From 21d191d01793a95c7beeab89b192d02b96a70114 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Tue, 7 Jan 2025 16:17:33 +0100 Subject: [PATCH] Deprecate positional boolean arguments (#1694) --- benchmarks/benchmarks/anndata.py | 10 +- docs/conf.py | 3 +- docs/extensions/no_skip_abc_members.py | 2 +- pyproject.toml | 3 + src/anndata/_core/anndata.py | 51 ++++--- src/anndata/_core/file_backing.py | 13 +- src/anndata/_core/merge.py | 2 +- src/anndata/_io/specs/methods.py | 2 +- src/anndata/_io/write.py | 7 +- src/anndata/compat/__init__.py | 14 +- .../multi_files/_anncollection.py | 30 ++++- .../experimental/pytorch/_annloader.py | 47 +++++-- src/anndata/logging.py | 18 ++- src/anndata/tests/helpers.py | 126 +++++++++++++----- src/anndata/utils.py | 1 + tests/test_backed_sparse.py | 6 +- tests/test_concatenate.py | 4 +- tests/test_concatenate_disk.py | 1 + tests/test_io_elementwise.py | 2 +- tests/test_io_utils.py | 2 +- tests/test_readwrite.py | 7 +- tests/test_settings.py | 2 +- tests/test_views.py | 2 +- 23 files changed, 251 insertions(+), 104 deletions(-) diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py index 15fb485a2..a6a036f24 100644 --- a/benchmarks/benchmarks/anndata.py +++ b/benchmarks/benchmarks/anndata.py @@ -15,8 +15,14 @@ def track_peakmem_garbage_collection(self, *_): def display_top(snapshot, key_type="lineno"): snapshot = snapshot.filter_traces( ( - tracemalloc.Filter(False, ""), - tracemalloc.Filter(False, ""), + tracemalloc.Filter( + inclusive=False, + filename_pattern="", + ), + tracemalloc.Filter( + inclusive=False, + filename_pattern="", + ), ) ) top_stats = snapshot.statistics(key_type) diff --git a/docs/conf.py b/docs/conf.py index f98fe5ba7..7c2807554 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -73,9 +73,10 @@ # Generate the API documentation when building autosummary_generate = True autodoc_member_order = "bysource" +autodoc_mock_imports = ["torch"] +# autodoc_default_flags = ['members'] issues_github_path = "scverse/anndata" rtd_links_prefix = PurePosixPath("src") -# autodoc_default_flags = ['members'] napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_include_init_with_doc = False diff --git a/docs/extensions/no_skip_abc_members.py b/docs/extensions/no_skip_abc_members.py index 66846e095..484232f93 100644 --- a/docs/extensions/no_skip_abc_members.py +++ b/docs/extensions/no_skip_abc_members.py @@ -16,7 +16,7 @@ def autodoc_skip_member( what: Literal["module", "class", "exception", "function", "method", "attribute"], name: str, obj: object, - skip: bool, + skip: bool, # noqa: FBT001 options: Options, ): if what == "method" and getattr(obj, "__isabstractmethod__", False): diff --git a/pyproject.toml b/pyproject.toml index 66e2815bc..db57ccdaa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ dependencies = [ "packaging>=20.0", # array-api-compat 1.5 has https://github.com/scverse/anndata/issues/1410 "array_api_compat>1.4,!=1.5", + "legacy-api-wrap", ] dynamic = ["version"] @@ -172,6 +173,7 @@ docstring-code-format = true select = [ "E", # Error detected by Pycodestyle "F", # Errors detected by Pyflakes + "FBT", # Boolean positional arguments "W", # Warning detected by Pycodestyle "PLW", # Pylint "UP", # pyupgrade @@ -205,6 +207,7 @@ required-imports = ["from __future__ import annotations"] "subprocess.call".msg = "Use `subprocess.run([…])` instead" "subprocess.check_call".msg = "Use `subprocess.run([…], check=True)` instead" "subprocess.check_output".msg = "Use `subprocess.run([…], check=True, capture_output=True)` instead" +"legacy_api_wrap.legacy_api".msg = "Use anndata.compat.old_positionals instead" [tool.ruff.lint.flake8-type-checking] exempt-modules = [] strict = true diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 8a8eaf949..5651940ab 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -24,7 +24,7 @@ from .. import utils from .._settings import settings -from ..compat import DaskArray, SpArray, ZarrArray, _move_adj_mtx +from ..compat import DaskArray, SpArray, ZarrArray, _move_adj_mtx, old_positionals from ..logging import anndata_logger as logger from ..utils import ( axis_len, @@ -219,12 +219,24 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): var={"var_names", "col_names", "index"}, ) + @old_positionals( + "obsm", + "varm", + "layers", + "raw", + "dtype", + "shape", + "filename", + "filemode", + "asview", + ) def __init__( self, X: np.ndarray | sparse.spmatrix | pd.DataFrame | None = None, obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, uns: Mapping[str, Any] | None = None, + *, obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, layers: Mapping[str, np.ndarray | sparse.spmatrix] | None = None, @@ -234,7 +246,6 @@ def __init__( filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None, asview: bool = False, - *, obsp: np.ndarray | Mapping[str, Sequence[Any]] | None = None, varp: np.ndarray | Mapping[str, Sequence[Any]] | None = None, oidx: Index1D | None = None, @@ -470,7 +481,10 @@ def _init_as_actual( # layers self.layers = layers - def __sizeof__(self, show_stratified=None, with_disk: bool = False) -> int: + @old_positionals("show_stratified", "with_disk") + def __sizeof__( + self, *, show_stratified: bool = False, with_disk: bool = False + ) -> int: def get_size(X) -> int: def cs_to_bytes(X) -> int: return int(X.data.nbytes + X.indptr.nbytes + X.indices.nbytes) @@ -1247,7 +1261,7 @@ def to_df(self, layer: str | None = None) -> pd.DataFrame: X = X.toarray() return pd.DataFrame(X, index=self.obs_names, columns=self.var_names) - def _get_X(self, use_raw=False, layer=None): + def _get_X(self, *, use_raw: bool = False, layer: str | None = None): """\ Convenience method for getting expression values with common arguments and error handling. @@ -1331,8 +1345,8 @@ def var_vector(self, k, *, layer: str | None = None) -> np.ndarray: layer = None return get_vector(self, k, "var", "obs", layer=layer) - @utils.deprecated("obs_vector") - def _get_obs_array(self, k, use_raw=False, layer=None): + @deprecated("obs_vector") + def _get_obs_array(self, k, use_raw=False, layer=None): # noqa: FBT002 """\ Get an array from the layer (default layer='X') along the :attr:`obs` dimension by first looking up `obs.keys` and then :attr:`obs_names`. @@ -1342,8 +1356,8 @@ def _get_obs_array(self, k, use_raw=False, layer=None): else: return self.raw.obs_vector(k) - @utils.deprecated("var_vector") - def _get_var_array(self, k, use_raw=False, layer=None): + @deprecated("var_vector") + def _get_var_array(self, k, use_raw=False, layer=None): # noqa: FBT002 """\ Get an array from the layer (default layer='X') along the :attr:`var` dimension by first looking up `var.keys` and then :attr:`var_names`. @@ -1382,7 +1396,8 @@ def _mutated_copy(self, **kwargs): new["raw"] = self.raw.copy() return AnnData(**new) - def to_memory(self, copy=False) -> AnnData: + @old_positionals("copy") + def to_memory(self, *, copy: bool = False) -> AnnData: """Return a new AnnData object with all backed arrays loaded into memory. Params @@ -1413,13 +1428,13 @@ def to_memory(self, copy=False) -> AnnData: ]: attr = getattr(self, attr_name, None) if attr is not None: - new[attr_name] = to_memory(attr, copy) + new[attr_name] = to_memory(attr, copy=copy) if self.raw is not None: new["raw"] = { - "X": to_memory(self.raw.X, copy), - "var": to_memory(self.raw.var, copy), - "varm": to_memory(self.raw.varm, copy), + "X": to_memory(self.raw.X, copy=copy), + "var": to_memory(self.raw.var, copy=copy), + "varm": to_memory(self.raw.varm, copy=copy), } if self.isbacked: @@ -1875,7 +1890,8 @@ def write_h5ad( write = write_h5ad # a shortcut and backwards compat - def write_csvs(self, dirname: PathLike, skip_data: bool = True, sep: str = ","): + @old_positionals("skip_data", "sep") + def write_csvs(self, dirname: PathLike, *, skip_data: bool = True, sep: str = ","): """\ Write annotation to `.csv` files. @@ -1895,7 +1911,8 @@ def write_csvs(self, dirname: PathLike, skip_data: bool = True, sep: str = ","): write_csvs(dirname, self, skip_data=skip_data, sep=sep) - def write_loom(self, filename: PathLike, write_obsm_varm: bool = False): + @old_positionals("write_obsm_varm") + def write_loom(self, filename: PathLike, *, write_obsm_varm: bool = False): """\ Write `.loom`-formatted hdf5 file. @@ -1948,9 +1965,11 @@ def chunked_X(self, chunk_size: int | None = None): if start < n: yield (self.X[start:n], start, n) + @old_positionals("replace") def chunk_X( self, select: int | Sequence[int] | np.ndarray = 1000, + *, replace: bool = True, ): """\ @@ -2008,7 +2027,7 @@ def _has_X(self) -> bool: # -------------------------------------------------------------------------- @property - @utils.deprecated("is_view") + @deprecated("is_view") def isview(self): return self.is_view diff --git a/src/anndata/_core/file_backing.py b/src/anndata/_core/file_backing.py index 6e8c5a558..5fce9a5d3 100644 --- a/src/anndata/_core/file_backing.py +++ b/src/anndata/_core/file_backing.py @@ -16,6 +16,7 @@ from os import PathLike from typing import Literal + from .._types import ArrayStorageType from . import anndata @@ -118,7 +119,7 @@ def is_open(self) -> bool: @singledispatch -def to_memory(x, copy=False): +def to_memory(x, *, copy: bool = False): """Permissivley convert objects to in-memory representation. If they already are in-memory, (or are just unrecognized) pass a copy through. @@ -131,27 +132,27 @@ def to_memory(x, copy=False): @to_memory.register(ZarrArray) @to_memory.register(h5py.Dataset) -def _(x, copy=False): +def _(x: ArrayStorageType, *, copy: bool = False): return x[...] @to_memory.register(BaseCompressedSparseDataset) -def _(x: BaseCompressedSparseDataset, copy=True): +def _(x: BaseCompressedSparseDataset, *, copy: bool = False): return x.to_memory() @to_memory.register(DaskArray) -def _(x, copy=False): +def _(x: DaskArray, *, copy: bool = False): return x.compute() @to_memory.register(Mapping) -def _(x: Mapping, copy=False): +def _(x: Mapping, *, copy: bool = False): return {k: to_memory(v, copy=copy) for k, v in x.items()} @to_memory.register(AwkArray) -def _(x, copy=False): +def _(x: AwkArray, *, copy: bool = False): from copy import copy as _copy if copy: diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 77672fdda..9fda2f74a 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -205,7 +205,7 @@ def equal_awkward(a, b) -> bool: return ak.almost_equal(a, b) -def as_sparse(x, use_sparse_array=False): +def as_sparse(x, *, use_sparse_array: bool = False): if not isinstance(x, sparse.spmatrix | SpArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 0d5631a0c..181b525c8 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -943,7 +943,7 @@ def read_series(dataset: h5py.Dataset) -> np.ndarray | pd.Categorical: parent = dataset.parent categories_dset = parent[_read_attr(dataset.attrs, "categories")] categories = read_elem(categories_dset) - ordered = bool(_read_attr(categories_dset.attrs, "ordered", False)) + ordered = bool(_read_attr(categories_dset.attrs, "ordered", default=False)) return pd.Categorical.from_codes( read_elem(dataset), categories, ordered=ordered ) diff --git a/src/anndata/_io/write.py b/src/anndata/_io/write.py index fb5dd6699..a2b5ed569 100644 --- a/src/anndata/_io/write.py +++ b/src/anndata/_io/write.py @@ -11,6 +11,7 @@ from scipy.sparse import issparse from .._warnings import WriteWarning +from ..compat import old_positionals from ..logging import get_logger if TYPE_CHECKING: @@ -21,8 +22,9 @@ logger = get_logger(__name__) +@old_positionals("skip_data", "sep") def write_csvs( - dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = "," + dirname: PathLike, adata: AnnData, *, skip_data: bool = True, sep: str = "," ): """See :meth:`~anndata.AnnData.write_csvs`.""" dirname = Path(dirname) @@ -75,7 +77,8 @@ def write_csvs( ) -def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False): +@old_positionals("write_obsm_varm") +def write_loom(filename: PathLike, adata: AnnData, *, write_obsm_varm: bool = False): filename = Path(filename) row_attrs = {k: np.array(v) for k, v in adata.var.to_dict("list").items()} row_names = adata.var_names diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 981e5a04b..fa3d83821 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -6,7 +6,7 @@ from collections.abc import Mapping from contextlib import AbstractContextManager from dataclasses import dataclass, field -from functools import singledispatch, wraps +from functools import partial, singledispatch, wraps from importlib.util import find_spec from inspect import Parameter, signature from pathlib import Path @@ -185,6 +185,16 @@ def __repr__(): return "mock cupy.ndarray" +if find_spec("legacy_api_wrap") or TYPE_CHECKING: + from legacy_api_wrap import legacy_api # noqa: TID251 + + old_positionals = partial(legacy_api, category=FutureWarning) +else: + + def old_positionals(*old_positionals): + return lambda func: func + + ############################# # IO helpers ############################# @@ -251,7 +261,7 @@ def _from_fixed_length_strings(value): def _decode_structured_array( - arr: np.ndarray, dtype: np.dtype | None = None, copy: bool = False + arr: np.ndarray, *, dtype: np.dtype | None = None, copy: bool = False ) -> np.ndarray: """ h5py 3.0 now reads all strings as bytes. There is a helper method which can convert these to strings, diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index c5f427f6d..b8d8fea03 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -15,10 +15,10 @@ from ..._core.merge import concat_arrays, inner_concat_aligned_mapping from ..._core.sparse_dataset import BaseCompressedSparseDataset from ..._core.views import _resolve_idx -from ...compat import _map_cat_to_str +from ...compat import _map_cat_to_str, old_positionals if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Iterable, Sequence from typing import Literal from ..._core.index import Index @@ -123,9 +123,11 @@ def _resolve_idx(self, oidx, vidx): class _IterateViewMixin: + @old_positionals("axis", "shuffle", "drop_last") def iterate_axis( self, batch_size: int, + *, axis: Literal[0, 1] = 0, shuffle: bool = False, drop_last: bool = False, @@ -189,7 +191,7 @@ def __init__( self.dtypes = dtypes self.obs_names = obs_names - def __getitem__(self, key, use_convert=True): + def __getitem__(self, key: str, *, use_convert: bool = True): if self._keys is not None and key not in self._keys: raise KeyError(f"No {key} in {self.attr} view") @@ -237,11 +239,12 @@ def keys(self): else: return list(getattr(self.adatas[0], self.attr).keys()) - def to_dict(self, keys=None, use_convert=True): + @old_positionals("use_convert") + def to_dict(self, keys: Iterable[str] | None = None, *, use_convert=True): dct = {} keys = self.keys() if keys is None else keys for key in keys: - dct[key] = self.__getitem__(key, use_convert) + dct[key] = self.__getitem__(key, use_convert=use_convert) return dct @property @@ -299,7 +302,7 @@ def __init__(self, reference, convert, resolved_idx): self._convert_X = None self.convert = convert - def _lazy_init_attr(self, attr, set_vidx=False): + def _lazy_init_attr(self, attr: str, *, set_vidx: bool = False): if getattr(self, f"_{attr}_view") is not None: return keys = None @@ -544,7 +547,8 @@ def __repr__(self): descr += f"\n {attr}: {str(keys)[1:-1]}" return descr - def to_adata(self, ignore_X: bool = False, ignore_layers: bool = False): + @old_positionals("ignore_X", "ignore_layers") + def to_adata(self, *, ignore_X: bool = False, ignore_layers: bool = False): """Convert this AnnCollectionView object to an AnnData object. Parameters @@ -675,9 +679,21 @@ class AnnCollection(_ConcatViewMixin, _IterateViewMixin): 100 """ + @old_positionals( + "join_obs", + "join_obsm", + "join_vars", + "label", + "keys", + "index_unique", + "convert", + "harmonize_dtypes", + "indices_strict", + ) def __init__( self, adatas: Sequence[AnnData] | dict[str, AnnData], + *, join_obs: Literal["inner", "outer"] | None = "inner", join_obsm: Literal["inner"] | None = None, join_vars: Literal["inner"] | None = None, diff --git a/src/anndata/experimental/pytorch/_annloader.py b/src/anndata/experimental/pytorch/_annloader.py index cebbe1b5d..ddc5e825f 100644 --- a/src/anndata/experimental/pytorch/_annloader.py +++ b/src/anndata/experimental/pytorch/_annloader.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Mapping from copy import copy from functools import partial from importlib.util import find_spec @@ -10,28 +11,44 @@ from scipy.sparse import issparse from ..._core.anndata import AnnData +from ...compat import old_positionals from ..multi_files._anncollection import AnnCollection, _ConcatViewMixin -if TYPE_CHECKING: - from collections.abc import Sequence - if find_spec("torch") or TYPE_CHECKING: import torch from torch.utils.data import BatchSampler, DataLoader, Sampler else: Sampler, BatchSampler, DataLoader = object, object, object +if TYPE_CHECKING: + from collections.abc import Callable, Generator, Sequence + from typing import TypeAlias, Union + + from scipy.sparse import spmatrix + + # need to use Union because of autodoc_mock_imports + Array: TypeAlias = Union[torch.Tensor, np.ndarray, spmatrix] # noqa: UP007 + # Custom sampler to get proper batches instead of joined separate indices # maybe move to multi_files class BatchIndexSampler(Sampler): - def __init__(self, n_obs, batch_size, shuffle=False, drop_last=False): + @old_positionals("batch_size", "shuffle", "drop_last") + def __init__( + self, + n_obs: int, + *, + batch_size: int, + shuffle: bool = False, + drop_last: bool = False, + ) -> None: self.n_obs = n_obs self.batch_size = batch_size if batch_size < n_obs else n_obs self.shuffle = shuffle self.drop_last = drop_last - def __iter__(self): + def __iter__(self) -> Generator[list[int], None, None]: + indices: list[int] if self.shuffle: indices = np.random.permutation(self.n_obs).tolist() else: @@ -46,7 +63,7 @@ def __iter__(self): yield batch - def __len__(self): + def __len__(self) -> int: if self.drop_last: length = self.n_obs // self.batch_size else: @@ -56,7 +73,7 @@ def __len__(self): # maybe replace use_cuda with explicit device option -def default_converter(arr, use_cuda, pin_memory): +def default_converter(arr: Array, *, use_cuda: bool, pin_memory: bool): if isinstance(arr, torch.Tensor): if use_cuda: arr = arr.cuda() @@ -73,7 +90,11 @@ def default_converter(arr, use_cuda, pin_memory): return arr -def _convert_on_top(convert, top_convert, attrs_keys): +def _convert_on_top( + convert: Callable[[Array], Array] | None | Mapping[str, Callable[[Array], Array]], + top_convert: Callable[[Array], Array], + attrs_keys: Sequence[str] | Mapping[str, Sequence[str]], +): if convert is None: new_convert = top_convert elif callable(convert): @@ -88,7 +109,8 @@ def compose_convert(arr): if attr not in convert: new_convert[attr] = top_convert else: - if isinstance(attrs_keys, list): + as_ks: Sequence[str] | None + if not isinstance(attrs_keys, Mapping): as_ks = None else: as_ks = attrs_keys[attr] @@ -126,9 +148,11 @@ class AnnLoader(DataLoader): arguments for `AnnCollection` initialization. """ + @old_positionals("batch_size", "shuffle", "use_default_converter", "use_cuda") def __init__( self, adatas: Sequence[AnnData] | dict[str, AnnData], + *, batch_size: int = 1, shuffle: bool = False, use_default_converter: bool = True, @@ -202,7 +226,10 @@ def __init__( ) else: sampler = BatchIndexSampler( - len(dataset), batch_size, shuffle, drop_last + len(dataset), + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last, ) super().__init__(dataset, batch_size=None, sampler=sampler, **kwargs) diff --git a/src/anndata/logging.py b/src/anndata/logging.py index 1a0f2e11d..d7849fd8e 100644 --- a/src/anndata/logging.py +++ b/src/anndata/logging.py @@ -3,6 +3,8 @@ import logging import os +from .compat import old_positionals + _previous_memory_usage = None anndata_logger = logging.getLogger("anndata") @@ -13,7 +15,7 @@ anndata_logger.handlers[-1].setLevel("INFO") -def get_logger(name): +def get_logger(name: str) -> logging.Logger: """\ Creates a child logger that delegates to anndata_logger instead to logging.root @@ -21,7 +23,7 @@ def get_logger(name): return anndata_logger.manager.getLogger(name) -def get_memory_usage(): +def get_memory_usage() -> tuple[float, float]: import psutil process = psutil.Process(os.getpid()) @@ -38,15 +40,19 @@ def get_memory_usage(): return mem, mem_diff -def format_memory_usage(mem_usage, msg="", newline=False): - newline = "\n" if newline else "" +@old_positionals("newline") +def format_memory_usage( + mem_usage: tuple[float, float], msg: str = "", *, newline: bool = False +): + nl = "\n" if newline else "" more = " \n... " if msg != "" else "" mem, diff = mem_usage return ( - f"{newline}{msg}{more}" + f"{nl}{msg}{more}" f"Memory usage: current {mem:.2f} GB, difference {diff:+.2f} GB" ) -def print_memory_usage(msg="", newline=False): +@old_positionals("newline") +def print_memory_usage(msg: str = "", *, newline: bool = False): print(format_memory_usage(get_memory_usage(), msg, newline)) diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 6ed637ed8..a915ff115 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -40,6 +40,8 @@ from collections.abc import Callable, Collection, Iterable from typing import Literal, TypeGuard, TypeVar + from .._types import ArrayStorageType + DT = TypeVar("DT") @@ -524,7 +526,7 @@ def subset_func(request): ################### -def format_msg(elem_name): +def format_msg(elem_name: str | None) -> str: if elem_name is not None: return f"Error raised from element {elem_name!r}." else: @@ -536,7 +538,7 @@ def report_name(func): """Report name of element being tested if test fails.""" @wraps(func) - def func_wrapper(*args, _elem_name=None, **kwargs): + def func_wrapper(*args, _elem_name: str | None = None, **kwargs): try: return func(*args, **kwargs) except Exception as e: @@ -561,17 +563,23 @@ def _assert_equal(a, b): @singledispatch -def assert_equal(a, b, exact=False, elem_name=None): +def assert_equal( + a: object, b: object, *, exact: bool = False, elem_name: str | None = None +): _assert_equal(a, b, _elem_name=elem_name) @assert_equal.register(CupyArray) -def assert_equal_cupy(a, b, exact=False, elem_name=None): - assert_equal(b, a.get(), exact, elem_name) +def assert_equal_cupy( + a: CupyArray, b: object, *, exact: bool = False, elem_name: str | None = None +): + assert_equal(b, a.get(), exact=exact, elem_name=elem_name) @assert_equal.register(np.ndarray) -def assert_equal_ndarray(a, b, exact=False, elem_name=None): +def assert_equal_ndarray( + a: np.ndarray, b: object, *, exact: bool = False, elem_name: str | None = None +): b = asarray(b) if not exact and is_numeric_dtype(a) and is_numeric_dtype(b): assert a.shape == b.shape, format_msg(elem_name) @@ -586,51 +594,72 @@ def assert_equal_ndarray(a, b, exact=False, elem_name=None): # Reshaping to allow >2d arrays assert a.shape == b.shape, format_msg(elem_name) assert_equal( - pd.DataFrame(a.reshape(-1)), pd.DataFrame(b.reshape(-1)), exact, elem_name + pd.DataFrame(a.reshape(-1)), + pd.DataFrame(b.reshape(-1)), + exact=exact, + elem_name=elem_name, ) else: assert np.all(a == b), format_msg(elem_name) @assert_equal.register(ArrayView) -def assert_equal_arrayview(a, b, exact=False, elem_name=None): +def assert_equal_arrayview( + a: ArrayView, b: object, *, exact: bool = False, elem_name: str | None = None +): assert_equal(asarray(a), asarray(b), exact=exact, elem_name=elem_name) @assert_equal.register(BaseCompressedSparseDataset) @assert_equal.register(sparse.spmatrix) -def assert_equal_sparse(a, b, exact=False, elem_name=None): +def assert_equal_sparse( + a: BaseCompressedSparseDataset | sparse.spmatrix, + b: object, + *, + exact: bool = False, + elem_name: str | None = None, +): a = asarray(a) - assert_equal(b, a, exact, elem_name=elem_name) + assert_equal(b, a, exact=exact, elem_name=elem_name) @assert_equal.register(SpArray) -def assert_equal_sparse_array(a, b, exact=False, elem_name=None): - return assert_equal_sparse(a, b, exact, elem_name) +def assert_equal_sparse_array( + a: SpArray, b: object, *, exact: bool = False, elem_name: str | None = None +): + return assert_equal_sparse(a, b, exact=exact, elem_name=elem_name) @assert_equal.register(CupySparseMatrix) -def assert_equal_cupy_sparse(a, b, exact=False, elem_name=None): +def assert_equal_cupy_sparse( + a: CupySparseMatrix, b: object, *, exact: bool = False, elem_name: str | None = None +): a = a.toarray() - assert_equal(b, a, exact, elem_name=elem_name) + assert_equal(b, a, exact=exact, elem_name=elem_name) @assert_equal.register(h5py.Dataset) @assert_equal.register(ZarrArray) -def assert_equal_h5py_dataset(a, b, exact=False, elem_name=None): +def assert_equal_h5py_dataset( + a: ArrayStorageType, b: object, *, exact: bool = False, elem_name: str | None = None +): a = asarray(a) - assert_equal(b, a, exact, elem_name=elem_name) + assert_equal(b, a, exact=exact, elem_name=elem_name) @assert_equal.register(DaskArray) -def assert_equal_dask_array(a, b, exact=False, elem_name=None): - assert_equal(b, a.compute(), exact, elem_name) +def assert_equal_dask_array( + a: DaskArray, b: object, *, exact: bool = False, elem_name: str | None = None +): + assert_equal(b, a.compute(), exact=exact, elem_name=elem_name) @assert_equal.register(pd.DataFrame) -def are_equal_dataframe(a, b, exact=False, elem_name=None): +def are_equal_dataframe( + a: pd.DataFrame, b: object, *, exact: bool = False, elem_name: str | None = None +): if not isinstance(b, pd.DataFrame): - assert_equal(b, a, exact, elem_name) # , a.values maybe? + assert_equal(b, a, exact=exact, elem_name=elem_name) # , a.values maybe? report_name(pd.testing.assert_frame_equal)( a, @@ -644,25 +673,38 @@ def are_equal_dataframe(a, b, exact=False, elem_name=None): @assert_equal.register(AwkArray) -def assert_equal_awkarray(a, b, exact=False, elem_name=None): +def assert_equal_awkarray( + a: AwkArray, b: object, *, exact: bool = False, elem_name: str | None = None +): import awkward as ak if exact: + assert isinstance(b, AwkArray) assert a.type == b.type, f"{a.type} != {b.type}, {format_msg(elem_name)}" assert ak.to_list(a) == ak.to_list(b), format_msg(elem_name) @assert_equal.register(Mapping) -def assert_equal_mapping(a, b, exact=False, elem_name=None): +def assert_equal_mapping( + a: Mapping, b: object, *, exact: bool = False, elem_name: str | None = None +): + assert isinstance(b, Mapping) assert set(a.keys()) == set(b.keys()), format_msg(elem_name) for k in a.keys(): if elem_name is None: elem_name = "" - assert_equal(a[k], b[k], exact, f"{elem_name}/{k}") + assert_equal(a[k], b[k], exact=exact, elem_name=f"{elem_name}/{k}") @assert_equal.register(AlignedMappingBase) -def assert_equal_aligned_mapping(a, b, exact=False, elem_name=None): +def assert_equal_aligned_mapping( + a: AlignedMappingBase, + b: object, + *, + exact: bool = False, + elem_name: str | None = None, +): + assert isinstance(b, AlignedMappingBase) a_indices = (a.parent.obs_names, a.parent.var_names) b_indices = (b.parent.obs_names, b.parent.var_names) for axis_idx in a.axes: @@ -674,17 +716,23 @@ def assert_equal_aligned_mapping(a, b, exact=False, elem_name=None): @assert_equal.register(pd.Index) -def assert_equal_index(a, b, exact=False, elem_name=None): - if not exact: - report_name(pd.testing.assert_index_equal)( - a, b, check_names=False, check_categorical=False, _elem_name=elem_name - ) - else: - report_name(pd.testing.assert_index_equal)(a, b, _elem_name=elem_name) +def assert_equal_index( + a: pd.Index, b: object, *, exact: bool = False, elem_name: str | None = None +): + params = dict(check_categorical=False) if not exact else {} + report_name(pd.testing.assert_index_equal)( + a, b, check_names=False, **params, _elem_name=elem_name + ) @assert_equal.register(pd.api.extensions.ExtensionArray) -def assert_equal_extension_array(a, b, exact=False, elem_name=None): +def assert_equal_extension_array( + a: pd.api.extensions.ExtensionArray, + b: object, + *, + exact: bool = False, + elem_name: str | None = None, +): report_name(pd.testing.assert_extension_array_equal)( a, b, @@ -695,7 +743,9 @@ def assert_equal_extension_array(a, b, exact=False, elem_name=None): @assert_equal.register(Raw) -def assert_equal_raw(a, b, exact=False, elem_name=None): +def assert_equal_raw( + a: Raw, b: object, *, exact: bool = False, elem_name: str | None = None +): def assert_is_not_none(x): # can't put an assert in a lambda assert x is not None @@ -711,7 +761,7 @@ def assert_is_not_none(x): # can't put an assert in a lambda @assert_equal.register(AnnData) def assert_adata_equal( - a: AnnData, b: AnnData, exact: bool = False, elem_name: str | None = None + a: AnnData, b: object, *, exact: bool = False, elem_name: str | None = None ): """\ Check whether two AnnData objects are equivalent, @@ -732,10 +782,12 @@ def fmt_name(x): else: return f"{elem_name}/{x}" + assert isinstance(b, AnnData) + # There may be issues comparing views, since np.allclose # can modify ArrayViews if they contain `nan`s - assert_equal(a.obs_names, b.obs_names, exact, elem_name=fmt_name("obs_names")) - assert_equal(a.var_names, b.var_names, exact, elem_name=fmt_name("var_names")) + assert_equal(a.obs_names, b.obs_names, exact=exact, elem_name=fmt_name("obs_names")) + assert_equal(a.var_names, b.var_names, exact=exact, elem_name=fmt_name("var_names")) if not exact: # Reorder all elements if necessary idx = [slice(None), slice(None)] @@ -764,7 +816,7 @@ def fmt_name(x): assert_equal( getattr(a, attr), getattr(b, attr), - exact, + exact=exact, elem_name=fmt_name(attr), ) diff --git a/src/anndata/utils.py b/src/anndata/utils.py index 60dffa87f..06c15bfb7 100644 --- a/src/anndata/utils.py +++ b/src/anndata/utils.py @@ -356,6 +356,7 @@ def deprecated( new_name: str, category: type[Warning] = DeprecationWarning, add_msg: str = "", + *, hide: bool = True, ): """\ diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 499aeadbd..043103820 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -636,14 +636,14 @@ def test_backed_sizeof( pytest.param(sparse.csr_array, marks=[*sparray_scipy_bug_marks]), ], ) -def test_append_overflow_check(group_fn, sparse_class, tmpdir): - group = group_fn(tmpdir) +def test_append_overflow_check(group_fn, sparse_class, tmp_path): + group = group_fn(tmp_path) typemax_int32 = np.iinfo(np.int32).max orig_mtx = sparse_class(np.ones((1, 1), dtype=bool)) # Minimally allocating new matrix new_mtx = sparse_class( ( - np.broadcast_to(True, typemax_int32 - 1), + np.broadcast_to(True, typemax_int32 - 1), # noqa: FBT003 np.broadcast_to(np.int32(1), typemax_int32 - 1), [0, typemax_int32 - 1], ), diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index 3dd5a361e..b43d4666e 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -125,7 +125,9 @@ def merge_strategy(request): return request.param -def fix_known_differences(orig, result, backwards_compat=True): +def fix_known_differences( + orig: AnnData, result: AnnData, *, backwards_compat: bool = True +): """ Helper function for reducing anndata's to only the elements we expect to be equivalent after concatenation. diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index a05d9a308..bdeb5c6e6 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -113,6 +113,7 @@ def get_array_type(array_type, axis): @pytest.mark.parametrize("reindex", [True, False], ids=["reindex", "no_reindex"]) def test_anndatas( + *, axis: Literal[0, 1], array_type: Literal["array", "sparse", "sparse_array"], join_type: Literal["inner", "outer"], diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 0f5bfb883..49c751b4e 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -214,7 +214,7 @@ def test_io_spec(store, value, encoding_type): [ pytest.param(np.asarray(1), "numeric-scalar", id="scalar_int"), pytest.param(np.asarray(1.0), "numeric-scalar", id="scalar_float"), - pytest.param(np.asarray(True), "numeric-scalar", id="scalar_bool"), + pytest.param(np.asarray(True), "numeric-scalar", id="scalar_bool"), # noqa: FBT003 pytest.param(np.asarray("test"), "string", id="scalar_string"), ], ) diff --git a/tests/test_io_utils.py b/tests/test_io_utils.py index 25c66f46d..b4e72d14a 100644 --- a/tests/test_io_utils.py +++ b/tests/test_io_utils.py @@ -32,7 +32,7 @@ def diskfmt(request): ) @pytest.mark.parametrize("nested", [True, False], ids=["nested", "root"]) def test_key_error( - tmp_path, group_fn: Callable[[Path], zarr.Group | h5py.Group], nested: bool + *, tmp_path, group_fn: Callable[[Path], zarr.Group | h5py.Group], nested: bool ): @report_read_key_on_error def read_attr(_): diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 518559995..a43111a06 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -107,11 +107,10 @@ def dtype(request): @pytest.mark.parametrize("typ", [np.array, csr_matrix, csr_array, as_dense_dask_array]) def test_readwrite_roundtrip(typ, tmp_path, diskfmt, diskfmt2): - tmpdir = Path(tmp_path) - pth1 = tmpdir / f"first.{diskfmt}" + pth1 = tmp_path / f"first.{diskfmt}" write1 = lambda x: getattr(x, f"write_{diskfmt}")(pth1) read1 = lambda: getattr(ad, f"read_{diskfmt}")(pth1) - pth2 = tmpdir / f"second.{diskfmt2}" + pth2 = tmp_path / f"second.{diskfmt2}" write2 = lambda x: getattr(x, f"write_{diskfmt2}")(pth2) read2 = lambda: getattr(ad, f"read_{diskfmt2}")(pth2) @@ -474,7 +473,7 @@ def test_readloom_deprecations(tmp_path): # positional -> keyword with pytest.warns(FutureWarning, match=r"sparse"): - depr_result = ad.io.read_loom(loom_pth, True) + depr_result = ad.io.read_loom(loom_pth, True) # noqa: FBT003 actual_result = ad.io.read_loom(loom_pth, sparse=True) assert type(depr_result.X) == type(actual_result.X) diff --git a/tests/test_settings.py b/tests/test_settings.py index ba7dba8f9..3387b0cd8 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -241,7 +241,7 @@ class TestEnum(Enum): ), ], ) -def test_describe(as_rst: bool, expected: str, settings: SettingsManager): +def test_describe(*, as_rst: bool, expected: str, settings: SettingsManager): assert settings.describe("test_var_3", as_rst=as_rst) == expected diff --git a/tests/test_views.py b/tests/test_views.py index fb6794dfd..6c376f0bf 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -631,7 +631,7 @@ def test_invalid_scalar_index(adata, index): @pytest.mark.parametrize("obs", [False, True]) @pytest.mark.parametrize("index", [-100, -50, -1]) -def test_negative_scalar_index(adata, index: int, obs: bool): +def test_negative_scalar_index(*, adata, index: int, obs: bool): pos_index = index + (adata.n_obs if obs else adata.n_vars) if obs: