From 0d0546fd2ec652045edb5517231ba5ebd14f0240 Mon Sep 17 00:00:00 2001 From: Intron7 Date: Fri, 12 Jul 2024 13:24:53 +0200 Subject: [PATCH 1/3] update dataloading --- decoupler/pre.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/decoupler/pre.py b/decoupler/pre.py index 268ad86..f6c54bf 100644 --- a/decoupler/pre.py +++ b/decoupler/pre.py @@ -18,7 +18,7 @@ def check_mat(m, r, c, verbose=False): # Check for empty features if type(m) is csr_matrix: - msk_features = np.sum(m != 0, axis=0).A1 == 0 + msk_features = m.getnnz(axis=0) == 0 else: msk_features = np.count_nonzero(m, axis=0) == 0 n_empty_features = np.sum(msk_features) @@ -29,8 +29,8 @@ def check_mat(m, r, c, verbose=False): m = m[:, ~msk_features] # Sort features - msk = np.argsort(c) - m, r, c = m[:, msk], r.astype('U'), c[msk].astype('U') + #msk = np.argsort(c) + #m, r, c = m[:, msk], r.astype('U'), c[msk].astype('U') # Check for repeated features if np.any(c[1:] == c[:-1]): @@ -38,7 +38,7 @@ def check_mat(m, r, c, verbose=False): # Check for empty samples if type(m) is csr_matrix: - msk_samples = np.sum(m != 0, axis=1).A1 == 0 + msk_samples = m.getnnz(axis=1) == 0 else: msk_samples = np.count_nonzero(m, axis=1) == 0 n_empty_samples = np.sum(msk_samples) @@ -174,9 +174,12 @@ def match(c, r, net): # Init empty regX regX = np.zeros((c.shape[0], net.shape[1]), dtype=np.float32) - # Match genes from mat, else are 0s - idxs = np.searchsorted(c, r) - regX[idxs] = net + # Create an index array for rows of c corresponding to r + c_dict = {gene: i for i, gene in enumerate(c)} + idxs = [c_dict[gene] for gene in r if gene in c_dict] + + # Populate regX using advanced indexing + regX[idxs, :] = net[: len(idxs), :] return regX From 31d9925ced493dcd47936cdaebe9aa6c466a87c7 Mon Sep 17 00:00:00 2001 From: Intron7 Date: Fri, 12 Jul 2024 13:25:00 +0200 Subject: [PATCH 2/3] use nice tqdm --- decoupler/method_aucell.py | 2 +- decoupler/method_gsea.py | 2 +- decoupler/method_gsva.py | 2 +- decoupler/method_mdt.py | 2 +- decoupler/method_mlm.py | 2 +- decoupler/method_ora.py | 2 +- decoupler/method_udt.py | 3 +-- decoupler/method_ulm.py | 2 +- decoupler/method_viper.py | 3 ++- decoupler/method_wmean.py | 2 +- decoupler/method_wsum.py | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/decoupler/method_aucell.py b/decoupler/method_aucell.py index 0b88357..2e3587c 100644 --- a/decoupler/method_aucell.py +++ b/decoupler/method_aucell.py @@ -8,7 +8,7 @@ from scipy.sparse import csr_matrix from numpy.random import default_rng -from tqdm import tqdm +from tqdm.auto import tqdm from .pre import extract, rename_net, filt_min_n, return_data diff --git a/decoupler/method_gsea.py b/decoupler/method_gsea.py index ba4c8ff..320b33c 100644 --- a/decoupler/method_gsea.py +++ b/decoupler/method_gsea.py @@ -12,7 +12,7 @@ from .pre import extract, rename_net, filt_min_n, return_data from .utils import p_adjust_fdr -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb diff --git a/decoupler/method_gsva.py b/decoupler/method_gsva.py index debc935..42a6bea 100644 --- a/decoupler/method_gsva.py +++ b/decoupler/method_gsva.py @@ -13,7 +13,7 @@ from .pre import extract, rename_net, filt_min_n, return_data from .method_gsea import std -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb diff --git a/decoupler/method_mdt.py b/decoupler/method_mdt.py index 1ec1f9a..f24ba44 100644 --- a/decoupler/method_mdt.py +++ b/decoupler/method_mdt.py @@ -9,7 +9,7 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data -from tqdm import tqdm +from tqdm.auto import tqdm def check_if_skranger(): diff --git a/decoupler/method_mlm.py b/decoupler/method_mlm.py index 9666cf2..790ca03 100644 --- a/decoupler/method_mlm.py +++ b/decoupler/method_mlm.py @@ -11,7 +11,7 @@ from scipy import stats -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb diff --git a/decoupler/method_ora.py b/decoupler/method_ora.py index 8eafb9a..2445c0e 100644 --- a/decoupler/method_ora.py +++ b/decoupler/method_ora.py @@ -15,7 +15,7 @@ from .pre import extract, rename_net, filt_min_n, return_data from .utils import p_adjust_fdr -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb diff --git a/decoupler/method_udt.py b/decoupler/method_udt.py index 09f6496..122bf4a 100644 --- a/decoupler/method_udt.py +++ b/decoupler/method_udt.py @@ -9,8 +9,7 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data -from tqdm import tqdm - +from tqdm.auto import tqdm def check_if_sklearn(): try: diff --git a/decoupler/method_ulm.py b/decoupler/method_ulm.py index 3a57c1e..9820494 100644 --- a/decoupler/method_ulm.py +++ b/decoupler/method_ulm.py @@ -11,7 +11,7 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data -from tqdm import tqdm +from tqdm.auto import tqdm def mat_cov(A, b): diff --git a/decoupler/method_viper.py b/decoupler/method_viper.py index 345853b..e9ffb46 100644 --- a/decoupler/method_viper.py +++ b/decoupler/method_viper.py @@ -12,7 +12,8 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data -from tqdm import tqdm +from tqdm.auto import tqdm + import numba as nb diff --git a/decoupler/method_wmean.py b/decoupler/method_wmean.py index 0d9f9c8..b13e14e 100644 --- a/decoupler/method_wmean.py +++ b/decoupler/method_wmean.py @@ -10,7 +10,7 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data from .method_gsea import std -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb diff --git a/decoupler/method_wsum.py b/decoupler/method_wsum.py index 45a390c..a00d87a 100644 --- a/decoupler/method_wsum.py +++ b/decoupler/method_wsum.py @@ -10,7 +10,7 @@ from .pre import extract, match, rename_net, get_net_mat, filt_min_n, return_data from .method_gsea import std -from tqdm import tqdm +from tqdm.auto import tqdm import numba as nb From 4ebdc644a62c5fe637bb3813fddc41169fad519c Mon Sep 17 00:00:00 2001 From: Pau Badia i Mompel <44896790+PauBadiaM@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:04:12 +0200 Subject: [PATCH 3/3] Update utils_benchmark.py convert grts to numpy array --- decoupler/utils_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decoupler/utils_benchmark.py b/decoupler/utils_benchmark.py index 65c6f38..93d34a6 100644 --- a/decoupler/utils_benchmark.py +++ b/decoupler/utils_benchmark.py @@ -332,7 +332,7 @@ def format_acts_grts(res, obs, groupby, use_pval): grts = build_grts_mat(obs, exps, srcs) # Match to same srcs between acts and grts - grts = match(srcs, grts.columns, grts.T).T + grts = match(srcs, grts.columns, grts.T.values).T # Build msks tensor msks, grpbys, grps = build_msks_tensor(obs, groupby)