Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

correct shuffling of annotations in nhood enrichment #775

Merged
merged 19 commits into from
Feb 5, 2024
Merged
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ jobs:
strategy:
fail-fast: false
matrix:
python: [3.9, "3.10"]
python: [3.9, "3.10", "3.11"]
os: [ubuntu-latest]
include:
- python: 3.9
os: macos-latest
- python: "3.10"
os: macos-latest
os: macos-14
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python }}
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ repos:
- id: blacken-docs
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.14
rev: v0.2.0
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
29 changes: 29 additions & 0 deletions docs/release/notes-1.4.0.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Squidpy 1.4.0 (2024-02-05)
==========================

Bugfixes
--------

- Fix building graph in ``knn`` and ``delaunay`` mode.
`@michalk8 <https://github.com/michalk8>`__
`#792 <https://github.com/scverse/squidpy/pull/792>`__

- Correct shuffling of annotations in ``sq.gr.nhood_enrichment``.
`@giovp <https://github.com/giovp>`__
`#775 <hhttps://github.com/scverse/squidpy/pull/775>`__


Miscellaneous
-------------

- Fix napari installation.
`@giovp <https://github.com/giovp>`__
`#767 <https://github.com/scverse/squidpy/pull/767>`__

- Made nanostring reader more flexible by adjusting loading of images.
`@FrancescaDr <https://github.com/FrancescaDr>`__
`#766 <https://github.com/scverse/squidpy/pull/766>`__

- Fix ``sq.tl.var_by_distance`` method to support ``pandas 2.2.0``.
`@LLehner <https://github.com/LLehner>`__
`#794 <https://github.com/scverse/squidpy/pull/794>`__
18 changes: 16 additions & 2 deletions src/squidpy/gr/_nhood.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pandas as pd
from anndata import AnnData
from numba import njit, prange # noqa: F401
from pandas import CategoricalDtype
from scanpy import logging as logg
from spatialdata import SpatialData

Expand All @@ -29,6 +30,7 @@
_assert_connectivity_key,
_assert_positive,
_save_data,
_shuffle_group,
)

__all__ = ["nhood_enrichment", "centrality_scores", "interaction_matrix"]
Expand Down Expand Up @@ -125,6 +127,7 @@ def _create_function(n_cls: int, parallel: bool = False) -> Callable[[NDArrayA,
def nhood_enrichment(
adata: AnnData | SpatialData,
cluster_key: str,
library_key: str | None = None,
connectivity_key: str | None = None,
n_perms: int = 1000,
numba_parallel: bool = False,
Expand All @@ -141,6 +144,7 @@ def nhood_enrichment(
----------
%(adata)s
%(cluster_key)s
%(library_key)s
%(conn_key)s
%(n_perms)s
%(numba_parallel)s
Expand Down Expand Up @@ -169,6 +173,12 @@ def nhood_enrichment(
clust_map = {v: i for i, v in enumerate(original_clust.cat.categories.values)} # map categories
int_clust = np.array([clust_map[c] for c in original_clust], dtype=ndt)

if library_key is not None:
_assert_categorical_obs(adata, key=library_key)
libraries: pd.Series | None = adata.obs[library_key]
else:
libraries = None

indices, indptr = (adj.indices.astype(ndt), adj.indptr.astype(ndt))
n_cls = len(clust_map)

Expand All @@ -185,7 +195,7 @@ def nhood_enrichment(
n_jobs=n_jobs,
backend=backend,
show_progress_bar=show_progress_bar,
)(callback=_test, indices=indices, indptr=indptr, int_clust=int_clust, n_cls=n_cls, seed=seed)
)(callback=_test, indices=indices, indptr=indptr, int_clust=int_clust, libraries=libraries, n_cls=n_cls, seed=seed)
zscore = (count - perms.mean(axis=0)) / perms.std(axis=0)

if copy:
Expand Down Expand Up @@ -397,6 +407,7 @@ def _nhood_enrichment_helper(
indices: NDArrayA,
indptr: NDArrayA,
int_clust: NDArrayA,
libraries: pd.Series[CategoricalDtype] | None,
n_cls: int,
seed: int | None = None,
queue: SigQueue | None = None,
Expand All @@ -406,7 +417,10 @@ def _nhood_enrichment_helper(
rs = np.random.RandomState(seed=None if seed is None else seed + ixs[0])

for i in range(len(ixs)):
rs.shuffle(int_clust)
if libraries is not None:
int_clust = _shuffle_group(int_clust, libraries, rs)
else:
rs.shuffle(int_clust)
perms[i, ...] = callback(indices, indptr, int_clust)

if queue is not None:
Expand Down
34 changes: 33 additions & 1 deletion src/squidpy/gr/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from anndata import AnnData
from anndata._core.views import ArrayView, SparseCSCView, SparseCSRView
from anndata.utils import make_index_unique
from pandas import CategoricalDtype
from pandas.api.types import infer_dtype, is_categorical_dtype
from scanpy import logging as logg
from scipy.sparse import csc_matrix, csr_matrix, issparse, spmatrix
Expand Down Expand Up @@ -139,7 +140,7 @@ def _assert_categorical_obs(adata: AnnData, key: str) -> None:
if key not in adata.obs:
raise KeyError(f"Cluster key `{key}` not found in `adata.obs`.")

if not is_categorical_dtype(adata.obs[key]):
if not isinstance(adata.obs[key].dtype, CategoricalDtype):
raise TypeError(f"Expected `adata.obs[{key!r}]` to be `categorical`, found `{infer_dtype(adata.obs[key])}`.")


Expand Down Expand Up @@ -300,3 +301,34 @@ def key_present() -> bool:
# in principle we assume the callee doesn't change the index
# otherwise, would need to check whether it has been changed and add an option to determine what to do
adata.var.index = var_names


def _shuffle_group(
cluster_annotation: NDArrayA,
libraries: pd.Series[CategoricalDtype],
rs: np.random.RandomState,
) -> NDArrayA:
"""
Shuffle values in ``arr`` for each category in ``categories``.

Useful when the shuffling of categories is used in permutation tests where the order of values in ``arr`` matters
(e.g. you only want to shuffle cluster annotations for the same slide/library_key, and not across slides)

Parameters
----------
cluster_annotation
Array to shuffle.
libraries
Categories (e.g. libraries) to subset for shuffling.

Returns
-------
Shuffled annotations.
"""
cluster_annotation_output = np.empty(libraries.shape, dtype=cluster_annotation.dtype)
for c in libraries.cat.categories:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

categories will always be small, right? Or is there the need to vectorized?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

categories should always be small yes

idx = np.where(libraries == c)[0]
arr_group = cluster_annotation[idx].copy()
rs.shuffle(arr_group) # it's done in place hence copy before
cluster_annotation_output[idx] = arr_group
return cluster_annotation_output
3 changes: 2 additions & 1 deletion src/squidpy/pl/_interactive/_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from anndata import AnnData
from napari import Viewer
from napari.layers import Points, Shapes
from pandas import CategoricalDtype
from pandas.core.dtypes.common import is_categorical_dtype
from PyQt5.QtWidgets import QGridLayout, QLabel, QWidget
from scanpy import logging as logg
Expand Down Expand Up @@ -181,7 +182,7 @@ def add_points(self, vec: NDArrayA | pd.Series, layer_name: str, key: str | None
**properties,
)
# TODO(michalk8): add contrasting fg/bg color once https://github.com/napari/napari/issues/2019 is done
self._hide_points_controls(layer, is_categorical=is_categorical_dtype(vec))
self._hide_points_controls(layer, is_categorical=isinstance(vec.dtype, CategoricalDtype))
layer.editable = False

return True
Expand Down
5 changes: 3 additions & 2 deletions src/squidpy/pl/_interactive/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from anndata import AnnData
from matplotlib.colors import to_hex, to_rgb
from numba import njit
from pandas import CategoricalDtype
from pandas._libs.lib import infer_dtype
from pandas.core.dtypes.common import is_categorical_dtype
from scanpy import logging as logg
Expand All @@ -23,7 +24,7 @@ def _get_categorical(
vec: pd.Series | None = None,
) -> NDArrayA:
if vec is not None:
if not is_categorical_dtype(vec):
if not isinstance(vec.dtype, CategoricalDtype):
raise TypeError(f"Expected a `categorical` type, found `{infer_dtype(vec)}`.")
if key in adata.obs:
logg.debug(f"Overwriting `adata.obs[{key!r}]`")
Expand All @@ -39,7 +40,7 @@ def _get_categorical(


def _position_cluster_labels(coords: NDArrayA, clusters: pd.Series, colors: NDArrayA) -> dict[str, NDArrayA]:
if not is_categorical_dtype(clusters):
if not isinstance(clusters.dtype, CategoricalDtype):
raise TypeError(f"Expected `clusters` to be `categorical`, found `{infer_dtype(clusters)}`.")

coords = coords[:, 1:] # TODO(michalk8): account for current Z-dim?
Expand Down
8 changes: 4 additions & 4 deletions src/squidpy/pl/_spatial_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Circle, Polygon, Rectangle
from matplotlib_scalebar.scalebar import ScaleBar
from pandas.api.types import CategoricalDtype
from pandas import CategoricalDtype
from pandas.core.dtypes.common import is_categorical_dtype
from scanpy import logging as logg
from scanpy._settings import settings as sc_settings
Expand Down Expand Up @@ -694,8 +694,8 @@ def _map_color_seg(
if is_categorical_dtype(color_vector):
if isinstance(na_color, tuple) and len(na_color) == 4 and np.any(color_source_vector.isna()):
cell_id[color_source_vector.isna()] = 0
val_im: NDArrayA = map_array(seg, cell_id, color_vector.codes + 1) # type: ignore
cols = colors.to_rgba_array(color_vector.categories) # type: ignore
val_im: NDArrayA = map_array(seg, cell_id, color_vector.codes + 1) # type: ignore[union-attr]
cols = colors.to_rgba_array(color_vector.categories) # type: ignore[union-attr]
else:
val_im = map_array(seg, cell_id, cell_id) # replace with same seg id to remove missing segs
try:
Expand Down Expand Up @@ -744,7 +744,7 @@ def _prepare_args_plot(

# set palette if missing
for c in color:
if c is not None and c in adata.obs and is_categorical_dtype(adata.obs[c]):
if c is not None and c in adata.obs and isinstance(adata.obs[c].dtype, CategoricalDtype):
_maybe_set_colors(source=adata, target=adata, key=c, palette=palette)

# check raw
Expand Down
3 changes: 2 additions & 1 deletion src/squidpy/pl/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from matplotlib.figure import Figure
from mpl_toolkits.axes_grid1 import make_axes_locatable
from numba import njit, prange
from pandas import CategoricalDtype
from pandas._libs.lib import infer_dtype
from pandas.core.dtypes.common import (
is_bool_dtype,
Expand Down Expand Up @@ -232,7 +233,7 @@ def decorator(self: ALayer, *args: Any, **kwargs: Any) -> Vector_name_t:
return None, None

if isinstance(res, pd.Series):
if is_categorical_dtype(res):
if isinstance(res, CategoricalDtype):
return res, fmt
if is_string_dtype(res) or is_object_dtype(res) or is_bool_dtype(res):
return res.astype("category"), fmt
Expand Down
27 changes: 27 additions & 0 deletions tests/graph/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
import pytest
from anndata import AnnData
from squidpy._constants._pkg_constants import Key
from squidpy.gr._utils import _shuffle_group


class TestUtils:
@pytest.mark.parametrize("cluster_annotations_type", [int, str])
@pytest.mark.parametrize("library_annotations_type", [int, str])
@pytest.mark.parametrize("seed", [422, 422222])
def test_shuffle_group(self, cluster_annotations_type: type, library_annotations_type: type, seed: int):
size = 6
rng = np.random.default_rng(seed)
if cluster_annotations_type == int:
libraries = pd.Series(rng.choice([1, 2, 3, 4], size=(size,)), dtype="category")
else:
libraries = pd.Series(rng.choice(["a", "b", "c"], size=(size,)), dtype="category")

if library_annotations_type == int:
cluster_annotations = rng.choice([1, 2, 3, 4], size=(size,))
else:
cluster_annotations = rng.choice(["X", "Y", "Z"], size=(size,))
out = _shuffle_group(cluster_annotations, libraries, rng)
for c in libraries.cat.categories:
assert set(out[libraries == c]) == set(cluster_annotations[libraries == c])
Loading