Loading data from Big Data Viewer (#3)

* Add create_pyramid_bdv_h5 function to create downsample pyramid * Added function for parsing metadata from mesospim * Added function to check the mesoSPIM directory * Added function to extract slice attribute from big data viewer xml files * Basic tile class * Basic ImageMosaic class * Formating in file_utils.py * Added a data_for_napari method * Added napari.yaml * Basic widget with the ability to load big data view h5 files * Updated pyproject.toml * Added method for checking and loading mesospim directory * Changed header text * Deleted placeholder test file * Added tests for tile.py * Added tests for image_mosaic.py * Added tests for stitching_widget.py * Add qt setup for linux in test_and_deploy.yml * Test only calling make_napari_viewer_proxy * Remove all tests with make_napari_viewer_proxy * Just the widget init test * Copied pyproject.toml from working branch * Added the entire test_stitching_widget.py file from the working branch * Added tox.ini configs from refactor branch to the lgeacy_tox_ini in pyproject.toml * Reverted back to tests relating to functionality of this PR * Apply suggestions from code review * Only add 1 to the shape if the dimension is of odd shape * Added docstrings to the stitching widget * Added comments for test_image_mosaic.py to describe file setup * Add default_directory as attribute in stitching widget
brainglobe · May 7, 2024 · 68f0d78 · 68f0d78
1 parent 493594e
commit 68f0d78
Show file tree

Hide file tree

Showing 13 changed files with 1,183 additions and 51 deletions.
diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
@@ -37,10 +37,16 @@ jobs:
           python-version: "3.10"
 
     steps:
+      # these libraries enable testing on Qt on linux
+      - uses: pyvista/setup-headless-display-action@v2
+        with:
+          qt: true
+
       # Run tests
       - uses: neuroinformatics-unit/actions/test@v2
         with:
           python-version: ${{ matrix.python-version }}
+          secret-codecov-token: ${{ secrets.CODECOV_TOKEN }}
 
   build_sdist_wheels:
     name: Build source distribution

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,6 @@
 include LICENSE
 include README.md
+include brainglobe_stitch/napari.yaml
 exclude .pre-commit-config.yaml
 
 recursive-include brainglobe_stitch *.py

diff --git a/brainglobe_stitch/file_utils.py b/brainglobe_stitch/file_utils.py
@@ -1,4 +1,10 @@
+import xml.etree.ElementTree as ET
 from pathlib import Path
+from typing import Dict, List, Tuple, Union
+
+import h5py
+import numpy as np
+import numpy.typing as npt
 
 HEADERS = [
     "[POSITION]",
@@ -8,70 +14,135 @@
 ]
 
 
-def write_big_stitcher_tile_config(meta_file_name: Path) -> list[dict]:
-    tile_metadata = parse_mesospim_metadata(meta_file_name)
-
-    output_file = str(meta_file_name)[:-12] + "_tile_config.txt"
-
-    first_channel = tile_metadata[0]["Laser"]
-    num_channels = 1
-
-    for metadata in tile_metadata[1:]:
-        if metadata["Laser"] == first_channel:
-            break
-        else:
-            num_channels += 1
-
-    num_tiles = len(tile_metadata) // num_channels
-    tile_xy_locations = []
-
-    for i in range(0, len(tile_metadata), num_channels):
-        curr_tile_dict = tile_metadata[i]
-
-        x = round(curr_tile_dict["x_pos"] / curr_tile_dict["Pixelsize in um"])
-        y = round(curr_tile_dict["y_pos"] / curr_tile_dict["Pixelsize in um"])
-
-        tile_xy_locations.append((x, y))
-
-    relative_locations = [(0, 0)]
-
-    for abs_tuple in tile_xy_locations[1:]:
-        rel_tuple = (
-            abs(abs_tuple[0] - tile_xy_locations[0][0]),
-            abs(abs_tuple[1] - tile_xy_locations[0][1]),
-        )
-        relative_locations.append(rel_tuple)
-
-    with open(output_file, "w") as f:
-        f.write("dim=3\n")
-        for i in range(len(tile_metadata)):
-            f.write(
-                f"{i};;"
-                f"({relative_locations[i%num_tiles][0]},"
-                f"{relative_locations[i%num_tiles][1]},0)\n"
-            )
-
-    return tile_metadata
-
-
-def parse_mesospim_metadata(meta_file_name: Path):
+def create_pyramid_bdv_h5(
+    input_file: Path,
+    yield_progress: bool = False,
+    resolutions_array: npt.NDArray = np.array(
+        [[1, 1, 1], [2, 2, 1], [4, 4, 1], [8, 8, 1], [16, 16, 1]]
+    ),
+    subdivisions_array: npt.NDArray = np.array(
+        [[32, 32, 16], [32, 32, 16], [32, 32, 16], [32, 32, 16], [32, 32, 16]]
+    ),
+):
+    """
+    Create a resolution pyramid for a Big Data Viewer HDF5 file. The function
+    assumes no pyramid exists and creates a new one in place. By default,
+    the function creates a 5 level pyramid with downsampling factors of 1, 2,
+    4, 8, and 16 in x and y, with no downsampling in z. Deletes the old
+    resolutions and subdivisions datasets and creates new ones.
+
+
+    Parameters
+    ----------
+    input_file: Path
+        The path to the input HDF5 file.
+    yield_progress: bool, optional
+        Whether to yield progress. If True, the function will yield the
+        progress as a percentage.
+    resolutions_array: npt.NDArray, optional
+        The downsampling factors to use for each resolution level.
+        This is a 2D array where each row represents a resolution level and the
+        columns represent the downsampling factors for x, y, and z.
+    subdivisions_array: npt.NDArray, optional
+        The size of the blocks at each resolution level.
+        This is a 2D array where each row represents a resolution level and the
+        columns represent the size of the blocks for x, y, and z.
+    """
+    with h5py.File(input_file, "r+") as f:
+        data_group = f["t00000"]
+        num_done = 0
+        num_slices = len(data_group)
+
+        for curr_slice in data_group:
+            # Delete the old resolutions and subdivisions datasets
+            del f[f"{curr_slice}/resolutions"]
+            del f[f"{curr_slice}/subdivisions"]
+            f[f"{curr_slice}/resolutions"] = resolutions_array
+            f[f"{curr_slice}/subdivisions"] = subdivisions_array
+
+            grp: h5py.Group = f[f"t00000/{curr_slice}"]
+            for i in range(1, resolutions_array.shape[0]):
+                downsampling_factors = (
+                    resolutions_array[i] // resolutions_array[i - 1]
+                )
+                prev_resolution = grp[f"{i - 1}/cells"]
+                # Add 1 to account for odd dimensions
+
+                grp.require_dataset(
+                    f"{i}/cells",
+                    dtype=prev_resolution.dtype,
+                    # Data is stored in z,y,x, but the downsampling
+                    # factors are in x,y,z, so need to reverse
+                    # Adding 1 allows to account for dimensions of odd size,
+                    # Only add 1 if the downsampling factor is greater than 1
+                    shape=(
+                        (
+                            prev_resolution.shape[0]
+                            + (downsampling_factors[2] > 1)
+                        )
+                        // downsampling_factors[2],
+                        (
+                            prev_resolution.shape[1]
+                            + (downsampling_factors[1] > 1)
+                        )
+                        // downsampling_factors[1],
+                        (
+                            prev_resolution.shape[2]
+                            + (downsampling_factors[0] > 1)
+                        )
+                        // downsampling_factors[0],
+                    ),
+                )
+                grp[f"{i}/cells"][...] = prev_resolution[
+                    :: downsampling_factors[2],
+                    :: downsampling_factors[1],
+                    :: downsampling_factors[0],
+                ]
+
+            num_done += 1
+
+            if yield_progress:
+                yield int(100 * num_done / num_slices)
+
+
+def parse_mesospim_metadata(
+    meta_file_name: Path,
+) -> List[Dict]:
+    """
+    Parse the metadata from a mesoSPIM .h5_meta.txt file.
+
+    Parameters
+    ----------
+    meta_file_name: Path
+        The path to the h5_meta.txt file.
+
+    Returns
+    -------
+    List[Dict]
+        A list of dictionaries containing the metadata for each tile.
+    """
     tile_metadata = []
     with open(meta_file_name, "r") as f:
         lines = f.readlines()
-        curr_tile_metadata: dict[str, str | int | float] = {}
+        curr_tile_metadata: Dict[str, Union[str, int, float]] = {}
 
         for line in lines[3:]:
             line = line.strip()
+            # Tile metadata is separated by a line starting with [CFG]
             if line.startswith("[CFG"):
                 tile_metadata.append(curr_tile_metadata)
                 curr_tile_metadata = {}
+            # Skip the headers
             elif line in HEADERS:
                 continue
+            # Skip empty lines
             elif not line:
                 continue
             else:
                 split_line = line.split("]")
                 value = split_line[1].strip()
+                # Check if the value is an int or a float
+                # If it is neither, store it as a string
                 if value.isdigit():
                     curr_tile_metadata[split_line[0][1:]] = int(value)
                 else:
@@ -82,3 +153,76 @@ def parse_mesospim_metadata(meta_file_name: Path):
 
     tile_metadata.append(curr_tile_metadata)
     return tile_metadata
+
+
+def check_mesospim_directory(
+    mesospim_directory: Path,
+) -> Tuple[Path, Path, Path]:
+    """
+    Check that the mesoSPIM directory contains the expected files.
+
+    Parameters
+    ----------
+    mesospim_directory: Path
+        The path to the mesoSPIM directory.
+
+    Returns
+    -------
+    Tuple[Path, Path, Path]
+        The paths to the bdv.xml, h5_meta.txt, and bdv.h5 files.
+    """
+    # List all files in the directory that do not start with a period
+    # But end in the correct file extension
+    xml_path = list(mesospim_directory.glob("[!.]*bdv.xml"))
+    meta_path = list(mesospim_directory.glob("[!.]*h5_meta.txt"))
+    h5_path = list(mesospim_directory.glob("[!.]*bdv.h5"))
+
+    # Check that there is exactly one file of each type
+    if len(xml_path) != 1:
+        raise FileNotFoundError(
+            f"Expected 1 bdv.xml file, found {len(xml_path)}"
+        )
+
+    if len(meta_path) != 1:
+        raise FileNotFoundError(
+            f"Expected 1 h5_meta.txt file, found {len(meta_path)}"
+        )
+
+    if len(h5_path) != 1:
+        raise FileNotFoundError(f"Expected 1 h5 file, found {len(h5_path)}")
+
+    return xml_path[0], meta_path[0], h5_path[0]
+
+
+def get_slice_attributes(
+    xml_path: Path, tile_names: List[str]
+) -> Dict[str, Dict]:
+    """
+    Get the slice attributes from a Big Data Viewer XML file. Attributes
+    include the illumination id, channel id, and tile id, and angle id.
+
+    Parameters
+    ----------
+    xml_path: Path
+        The path to the XML file.
+    tile_names: List[str]
+        The names of the tiles.
+
+    Returns
+    -------
+    Dict[str, Dict]
+        A dictionary containing the slice attributes for each tile.
+    """
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    view_setups = root.findall(".//ViewSetup//attributes")
+
+    slice_attributes = {}
+    for child, name in zip(view_setups, tile_names):
+        sub_dict = {}
+        for sub_child in child:
+            sub_dict[sub_child.tag] = sub_child.text
+
+        slice_attributes[name] = sub_dict
+
+    return slice_attributes