From 7aae234c9196f01b54fae4faa6547bcb0bd76893 Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Wed, 21 Aug 2024 22:48:32 +0100 Subject: [PATCH 1/9] Add seed to leiden clustering --- src/scib_metrics/metrics/_nmi_ari.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/scib_metrics/metrics/_nmi_ari.py b/src/scib_metrics/metrics/_nmi_ari.py index 215c27c..903f0dd 100644 --- a/src/scib_metrics/metrics/_nmi_ari.py +++ b/src/scib_metrics/metrics/_nmi_ari.py @@ -1,4 +1,5 @@ import logging +import random import warnings import igraph @@ -19,7 +20,9 @@ def _compute_clustering_kmeans(X: np.ndarray, n_clusters: int) -> np.ndarray: return kmeans.labels_ -def _compute_clustering_leiden(connectivity_graph: spmatrix, resolution: float) -> np.ndarray: +def _compute_clustering_leiden(connectivity_graph: spmatrix, resolution: float, seed: int) -> np.ndarray: + rng = random.Random(seed) + ig.set_random_number_generator(rng) # The connectivity graph with the umap method is symmetric, but we need to first make it directed # to have both sets of edges as is done in scanpy. See test for more details. g = igraph.Graph.Weighted_Adjacency(connectivity_graph, mode="directed") @@ -33,8 +36,9 @@ def _compute_nmi_ari_cluster_labels( X: spmatrix, labels: np.ndarray, resolution: float = 1.0, + seed: int = 42, ) -> tuple[float, float]: - labels_pred = _compute_clustering_leiden(X, resolution) + labels_pred = _compute_clustering_leiden(X, resolution, seed) nmi = normalized_mutual_info_score(labels, labels_pred, average_method="arithmetic") ari = adjusted_rand_score(labels, labels_pred) return nmi, ari @@ -71,7 +75,7 @@ def nmi_ari_cluster_labels_kmeans(X: np.ndarray, labels: np.ndarray) -> dict[str def nmi_ari_cluster_labels_leiden( - X: NeighborsResults, labels: np.ndarray, optimize_resolution: bool = True, resolution: float = 1.0, n_jobs: int = 1 + X: NeighborsResults, labels: np.ndarray, optimize_resolution: bool = True, resolution: float = 1.0, n_jobs: int = 1, seed: int = 42 ) -> dict[str, float]: """Compute nmi and ari between leiden clusters and labels. @@ -93,6 +97,8 @@ def nmi_ari_cluster_labels_leiden( n_jobs Number of jobs for parallelizing resolution optimization via joblib. If -1, all CPUs are used. + seed + Seed used for reproducibility of clustering. Returns ------- @@ -113,7 +119,7 @@ def nmi_ari_cluster_labels_leiden( ) except ImportError: warnings.warn("Using for loop over clustering resolutions. `pip install joblib` for parallelization.") - out = [_compute_nmi_ari_cluster_labels(conn_graph, labels, r) for r in resolutions] + out = [_compute_nmi_ari_cluster_labels(conn_graph, labels, r, seed=seed) for r in resolutions] nmi_ari = np.array(out) nmi_ind = np.argmax(nmi_ari[:, 0]) nmi, ari = nmi_ari[nmi_ind, :] From 76b441a183830f40c4824a7815ddb7366d923bfe Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Wed, 21 Aug 2024 22:49:09 +0100 Subject: [PATCH 2/9] Update _nmi_ari.py --- src/scib_metrics/metrics/_nmi_ari.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scib_metrics/metrics/_nmi_ari.py b/src/scib_metrics/metrics/_nmi_ari.py index 903f0dd..7c8b961 100644 --- a/src/scib_metrics/metrics/_nmi_ari.py +++ b/src/scib_metrics/metrics/_nmi_ari.py @@ -22,7 +22,7 @@ def _compute_clustering_kmeans(X: np.ndarray, n_clusters: int) -> np.ndarray: def _compute_clustering_leiden(connectivity_graph: spmatrix, resolution: float, seed: int) -> np.ndarray: rng = random.Random(seed) - ig.set_random_number_generator(rng) + igraph.set_random_number_generator(rng) # The connectivity graph with the umap method is symmetric, but we need to first make it directed # to have both sets of edges as is done in scanpy. See test for more details. g = igraph.Graph.Weighted_Adjacency(connectivity_graph, mode="directed") From d23760c309b414c40ff800e2b94cdeb52865df1e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 21:49:59 +0000 Subject: [PATCH 3/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/scib_metrics/metrics/_nmi_ari.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scib_metrics/metrics/_nmi_ari.py b/src/scib_metrics/metrics/_nmi_ari.py index 7c8b961..9fd18ac 100644 --- a/src/scib_metrics/metrics/_nmi_ari.py +++ b/src/scib_metrics/metrics/_nmi_ari.py @@ -75,7 +75,12 @@ def nmi_ari_cluster_labels_kmeans(X: np.ndarray, labels: np.ndarray) -> dict[str def nmi_ari_cluster_labels_leiden( - X: NeighborsResults, labels: np.ndarray, optimize_resolution: bool = True, resolution: float = 1.0, n_jobs: int = 1, seed: int = 42 + X: NeighborsResults, + labels: np.ndarray, + optimize_resolution: bool = True, + resolution: float = 1.0, + n_jobs: int = 1, + seed: int = 42, ) -> dict[str, float]: """Compute nmi and ari between leiden clusters and labels. From 292baed81ef571dff99ceaaabe902becd9d5c832 Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Wed, 21 Aug 2024 22:53:21 +0100 Subject: [PATCH 4/9] Update test_metrics.py --- tests/test_metrics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index c55e079..34d2dd0 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -110,6 +110,14 @@ def test_nmi_ari_cluster_labels_leiden_single_resolution(): assert isinstance(nmi, float) assert isinstance(ari, float) +def test_nmi_ari_cluster_labels_leiden_reproducibility(): + X, labels = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) + out1 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0) + out2 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0) + nmi1, ari1 = out1["nmi"], out1["ari"] + nmi2, ari2 = out2["nmi"], out2["ari"] + assert nmi1 == nmi2 + assert ari1 == ari2 def test_leiden_graph_construction(): X, _ = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) From dbad5d6b5f24889bf4b937fbbded69771f6349d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 21:53:28 +0000 Subject: [PATCH 5/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_metrics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 34d2dd0..c8f5cdf 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -110,6 +110,7 @@ def test_nmi_ari_cluster_labels_leiden_single_resolution(): assert isinstance(nmi, float) assert isinstance(ari, float) + def test_nmi_ari_cluster_labels_leiden_reproducibility(): X, labels = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) out1 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0) @@ -119,6 +120,7 @@ def test_nmi_ari_cluster_labels_leiden_reproducibility(): assert nmi1 == nmi2 assert ari1 == ari2 + def test_leiden_graph_construction(): X, _ = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) conn_graph = X.knn_graph_connectivities From 009bf223bd0a0eda90516af888dab6d1961f71d4 Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Thu, 22 Aug 2024 07:59:08 +0100 Subject: [PATCH 6/9] Update test_metrics.py --- tests/test_metrics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index c8f5cdf..ff6b7ea 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -120,6 +120,13 @@ def test_nmi_ari_cluster_labels_leiden_reproducibility(): assert nmi1 == nmi2 assert ari1 == ari2 + out1 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0, seed=1) + out2 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0, seed=100) + nmi1, ari1 = out1["nmi"], out1["ari"] + nmi2, ari2 = out2["nmi"], out2["ari"] + assert nmi1 != nmi2 + assert ari1 != ari2 + def test_leiden_graph_construction(): X, _ = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) From 4c7fe7be8de25f645dfdf91d3fef12e1f0d28949 Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Thu, 22 Aug 2024 08:06:27 +0100 Subject: [PATCH 7/9] Update test_metrics.py --- tests/test_metrics.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index ff6b7ea..c8f5cdf 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -120,13 +120,6 @@ def test_nmi_ari_cluster_labels_leiden_reproducibility(): assert nmi1 == nmi2 assert ari1 == ari2 - out1 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0, seed=1) - out2 = scib_metrics.nmi_ari_cluster_labels_leiden(X, labels, optimize_resolution=False, resolution=3.0, seed=100) - nmi1, ari1 = out1["nmi"], out1["ari"] - nmi2, ari2 = out2["nmi"], out2["ari"] - assert nmi1 != nmi2 - assert ari1 != ari2 - def test_leiden_graph_construction(): X, _ = dummy_x_labels(symmetric_positive=True, x_is_neighbors_results=True) From 8b70724f836467e7482e6ff53ebc5aae9ae45b04 Mon Sep 17 00:00:00 2001 From: Adam Gayoso Date: Thu, 22 Aug 2024 08:07:57 +0100 Subject: [PATCH 8/9] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d25180..e5aa17b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning][]. - Add `progress_bar` argument to {class}`scib_metrics.benchmark.Benchmarker` {pr}`152`. +### Changed + +- Leiden clustering now has a seed argument for reproducibility {pr}`173`. + ### Fixed - Fix neighbors connectivities in test to use new scanpy fn {pr}`170`. From d48f3730d40149b275a9af78401f3cbf14952f90 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 07:09:27 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5aa17b..8e377d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning][]. ### Changed -- Leiden clustering now has a seed argument for reproducibility {pr}`173`. +- Leiden clustering now has a seed argument for reproducibility {pr}`173`. ### Fixed