diff --git a/.github/workflows/test_linux.yml b/.github/workflows/test_linux.yml index bd48c9dd75..ffe9c7c668 100644 --- a/.github/workflows/test_linux.yml +++ b/.github/workflows/test_linux.yml @@ -53,7 +53,7 @@ jobs: DISPLAY: :42 COLUMNS: 120 run: | - coverage run -m pytest -v --color=yes + coverage run -m pytest -v --color=yes -m "not custom_dataloader" coverage report - uses: codecov/codecov-action@v4 diff --git a/.github/workflows/test_linux_custom_dataloader.yml b/.github/workflows/test_linux_custom_dataloader.yml new file mode 100644 index 0000000000..e388fd994f --- /dev/null +++ b/.github/workflows/test_linux_custom_dataloader.yml @@ -0,0 +1,89 @@ +name: test (custom dataloaders) + +on: + push: + branches: [main, "[0-9]+.[0-9]+.x"] + pull_request: + branches: [main, "[0-9]+.[0-9]+.x"] + types: [labeled, synchronize, opened] + schedule: + - cron: "0 10 * * *" # runs at 10:00 UTC (03:00 PST) every day + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + # if PR has label "custom_dataloader" or "all tests" or if scheduled or manually triggered + if: >- + ( + contains(github.event.pull_request.labels.*.name, 'custom_dataloader') || + contains(github.event.pull_request.labels.*.name, 'all tests') || + contains(github.event_name, 'schedule') || + contains(github.event_name, 'workflow_dispatch') + ) + + runs-on: ${{ matrix.os }} + + defaults: + run: + shell: bash -e {0} # -e to fail on error + + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python: ["3.11"] + + name: integration + + env: + OS: ${{ matrix.os }} + PYTHON: ${{ matrix.python }} + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + cache: "pip" + cache-dependency-path: "**/pyproject.toml" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip wheel uv + python -m uv pip install --system "scvi-tools[tests] @ ." + python -m pip install scdataloader + python -m pip install cellxgene-census + python -m pip install tiledbsoma + python -m pip install s3fs + python -m pip install torchdata==0.9.0 + python -m pip install psutil + python -m pip install lamindb + python -m pip install bionty==0.51.0 + python -m pip install biomart + + - name: Install Specific Branch of Repository + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + run: | + git config --global url."https://${GH_TOKEN}:x-oauth-basic@github.com/".insteadOf "https://github.com/" + git clone --single-branch --branch ebezzi/census-scvi-datamodule https://github.com/ori-kron-wis/cellxgene-census.git + git clone --single-branch --branch main https://github.com/jkobject/scDataLoader.git + + - name: Run specific custom dataloader pytest + env: + MPLBACKEND: agg + PLATFORM: ${{ matrix.os }} + DISPLAY: :42 + COLUMNS: 120 + run: | + coverage run -m pytest tests/dataloaders/test_custom_dataloader.py -v --color=yes --custom-dataloader-tests + coverage report + + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/cellxgene-census b/cellxgene-census new file mode 160000 index 0000000000..0797d55a60 --- /dev/null +++ b/cellxgene-census @@ -0,0 +1 @@ +Subproject commit 0797d55a60b82075d8c017af82eccf2845424fdf diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index b5890651da..d8479ad370 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit b5890651da3ad734cc12e7d54b39395aa6e9137d +Subproject commit d8479ad3709471940e2a8adaf7c917949ccca982 diff --git a/pyproject.toml b/pyproject.toml index 5fa13f97c3..6ac7bd1bbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,9 @@ tutorials = [ "scvi-tools[optional]", "squidpy>=1.6.0", ] +dataloaders = [ + "scdataloader" +] all = ["scvi-tools[dev,docs,tutorials]"] diff --git a/src/scvi/data/_utils.py b/src/scvi/data/_utils.py index b476618196..31753f03d2 100644 --- a/src/scvi/data/_utils.py +++ b/src/scvi/data/_utils.py @@ -16,6 +16,7 @@ from torch import as_tensor, sparse_csc_tensor, sparse_csr_tensor from scvi import REGISTRY_KEYS, settings +from scvi.utils import attrdict from . import _constants @@ -150,6 +151,14 @@ def _set_data_in_registry( setattr(adata, attr_name, attribute) +def _get_summary_stats_from_registry(registry: dict) -> attrdict: + summary_stats = {} + for field_registry in registry[_constants._FIELD_REGISTRIES_KEY].values(): + field_summary_stats = field_registry[_constants._SUMMARY_STATS_KEY] + summary_stats.update(field_summary_stats) + return attrdict(summary_stats) + + def _verify_and_correct_data_format(adata: AnnData, attr_name: str, attr_key: str | None): """Check data format and correct if necessary. diff --git a/src/scvi/dataloaders/_data_splitting.py b/src/scvi/dataloaders/_data_splitting.py index 9ea0146acb..4b4b14da63 100644 --- a/src/scvi/dataloaders/_data_splitting.py +++ b/src/scvi/dataloaders/_data_splitting.py @@ -386,7 +386,8 @@ class is :class:`~scvi.dataloaders.SemiSupervisedDataLoader`, def __init__( self, - adata_manager: AnnDataManager, + adata_manager: AnnDataManager | None = None, + datamodule: pl.LightningDataModule | None = None, train_size: float | None = None, validation_size: float | None = None, shuffle_set_split: bool = True, diff --git a/src/scvi/external/stereoscope/_model.py b/src/scvi/external/stereoscope/_model.py index 05e1ad0bf7..99e6639f1b 100644 --- a/src/scvi/external/stereoscope/_model.py +++ b/src/scvi/external/stereoscope/_model.py @@ -53,7 +53,8 @@ class RNAStereoscope(UnsupervisedTrainingMixin, BaseModelClass): def __init__( self, - sc_adata: AnnData, + sc_adata: AnnData | None = None, + registry: dict | None = None, **model_kwargs, ): super().__init__(sc_adata) diff --git a/src/scvi/external/stereoscope/_module.py b/src/scvi/external/stereoscope/_module.py index eefb2eb139..f74977d3ec 100644 --- a/src/scvi/external/stereoscope/_module.py +++ b/src/scvi/external/stereoscope/_module.py @@ -140,6 +140,7 @@ def __init__( n_spots: int, sc_params: tuple[np.ndarray], prior_weight: Literal["n_obs", "minibatch"] = "n_obs", + **model_kwargs, ): super().__init__() # unpack and copy parameters diff --git a/src/scvi/model/_amortizedlda.py b/src/scvi/model/_amortizedlda.py index 1817d96fc0..b2f7030914 100644 --- a/src/scvi/model/_amortizedlda.py +++ b/src/scvi/model/_amortizedlda.py @@ -61,7 +61,8 @@ class AmortizedLDA(PyroSviTrainMixin, BaseModelClass): def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_topics: int = 20, n_hidden: int = 128, cell_topic_prior: float | Sequence[float] | None = None, diff --git a/src/scvi/model/_autozi.py b/src/scvi/model/_autozi.py index 08b4e35131..b5cb50e6b3 100644 --- a/src/scvi/model/_autozi.py +++ b/src/scvi/model/_autozi.py @@ -104,7 +104,8 @@ class AUTOZI(VAEMixin, UnsupervisedTrainingMixin, BaseModelClass): def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 10, n_layers: int = 1, diff --git a/src/scvi/model/_condscvi.py b/src/scvi/model/_condscvi.py index b5e49a711d..38730fb735 100644 --- a/src/scvi/model/_condscvi.py +++ b/src/scvi/model/_condscvi.py @@ -67,7 +67,8 @@ class CondSCVI(RNASeqMixin, VAEMixin, UnsupervisedTrainingMixin, BaseModelClass) def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 5, n_layers: int = 2, diff --git a/src/scvi/model/_jaxscvi.py b/src/scvi/model/_jaxscvi.py index 2a212aad53..e82847c6f5 100644 --- a/src/scvi/model/_jaxscvi.py +++ b/src/scvi/model/_jaxscvi.py @@ -59,7 +59,8 @@ class JaxSCVI(JaxTrainingMixin, BaseModelClass): def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 10, dropout_rate: float = 0.1, diff --git a/src/scvi/model/_linear_scvi.py b/src/scvi/model/_linear_scvi.py index 0cbefc3968..cb09e3ce80 100644 --- a/src/scvi/model/_linear_scvi.py +++ b/src/scvi/model/_linear_scvi.py @@ -78,7 +78,8 @@ class LinearSCVI(RNASeqMixin, VAEMixin, UnsupervisedTrainingMixin, BaseModelClas def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 10, n_layers: int = 1, diff --git a/src/scvi/model/_multivi.py b/src/scvi/model/_multivi.py index d9c9871169..5960193c10 100644 --- a/src/scvi/model/_multivi.py +++ b/src/scvi/model/_multivi.py @@ -145,6 +145,7 @@ class MULTIVI(VAEMixin, UnsupervisedTrainingMixin, BaseModelClass, ArchesMixin): def __init__( self, adata: AnnOrMuData, + registry: dict | None = None, n_genes: int | None = None, n_regions: int | None = None, modality_weights: Literal["equal", "cell", "universal"] = "equal", diff --git a/src/scvi/model/_peakvi.py b/src/scvi/model/_peakvi.py index 1c5e011bba..54ee253804 100644 --- a/src/scvi/model/_peakvi.py +++ b/src/scvi/model/_peakvi.py @@ -92,7 +92,8 @@ class PEAKVI(ArchesMixin, VAEMixin, UnsupervisedTrainingMixin, BaseModelClass): def __init__( self, - adata: AnnData, + adata: AnnData | None = None, + registry: dict | None = None, n_hidden: int | None = None, n_latent: int | None = None, n_layers_encoder: int = 2, diff --git a/src/scvi/model/_scanvi.py b/src/scvi/model/_scanvi.py index 87a14dbd54..59f8034ac3 100644 --- a/src/scvi/model/_scanvi.py +++ b/src/scvi/model/_scanvi.py @@ -8,7 +8,9 @@ import numpy as np import pandas as pd import torch +from anndata import AnnData +import scvi from scvi import REGISTRY_KEYS, settings from scvi.data import AnnDataManager from scvi.data._constants import ( @@ -39,9 +41,14 @@ from typing import Literal from anndata import AnnData + from lightning import LightningDataModule from ._scvi import SCVI +_SCANVI_LATENT_QZM = "_scanvi_latent_qzm" +_SCANVI_LATENT_QZV = "_scanvi_latent_qzv" +_SCANVI_OBSERVED_LIB_SIZE = "_scanvi_observed_lib_size" + logger = logging.getLogger(__name__) @@ -75,6 +82,9 @@ class SCANVI(RNASeqMixin, VAEMixin, ArchesMixin, BaseMinifiedModeModelClass): * ``'nb'`` - Negative binomial distribution * ``'zinb'`` - Zero-inflated negative binomial distribution * ``'poisson'`` - Poisson distribution + use_observed_lib_size + If ``True``, use the observed library size for RNA as the scaling factor in the mean of the + conditional distribution. linear_classifier If ``True``, uses a single linear layer for classification instead of a multi-layer perceptron. @@ -106,35 +116,45 @@ class SCANVI(RNASeqMixin, VAEMixin, ArchesMixin, BaseMinifiedModeModelClass): def __init__( self, - adata: AnnData, + adata: AnnData | None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 10, n_layers: int = 1, dropout_rate: float = 0.1, dispersion: Literal["gene", "gene-batch", "gene-label", "gene-cell"] = "gene", gene_likelihood: Literal["zinb", "nb", "poisson"] = "zinb", + use_observed_lib_size: bool = True, linear_classifier: bool = False, + datamodule: LightningDataModule | None = None, **model_kwargs, ): - super().__init__(adata) + super().__init__(adata, registry) scanvae_model_kwargs = dict(model_kwargs) - self._set_indices_and_labels() + self._set_indices_and_labels(datamodule) # ignores unlabeled catgegory n_labels = self.summary_stats.n_labels - 1 - n_cats_per_cov = ( - self.adata_manager.get_state_registry(REGISTRY_KEYS.CAT_COVS_KEY).n_cats_per_key - if REGISTRY_KEYS.CAT_COVS_KEY in self.adata_manager.data_registry - else None - ) + if adata is not None: + n_cats_per_cov = ( + self.adata_manager.get_state_registry(REGISTRY_KEYS.CAT_COVS_KEY).n_cats_per_key + if REGISTRY_KEYS.CAT_COVS_KEY in self.adata_manager.data_registry + else None + ) + else: + # custom datamodule + n_cats_per_cov = self.summary_stats[f"n_{REGISTRY_KEYS.CAT_COVS_KEY}"] + if n_cats_per_cov == 0: + n_cats_per_cov = None n_batch = self.summary_stats.n_batch - use_size_factor_key = REGISTRY_KEYS.SIZE_FACTOR_KEY in self.adata_manager.data_registry + use_size_factor_key = self.registry_["setup_args"][f"{REGISTRY_KEYS.SIZE_FACTOR_KEY}_key"] library_log_means, library_log_vars = None, None if ( not use_size_factor_key and self.minified_data_type != ADATA_MINIFY_TYPE.LATENT_POSTERIOR + and not use_observed_lib_size ): library_log_means, library_log_vars = _init_library_size(self.adata_manager, n_batch) @@ -151,6 +171,7 @@ def __init__( dispersion=dispersion, gene_likelihood=gene_likelihood, use_size_factor_key=use_size_factor_key, + use_observed_lib_size=use_observed_lib_size, library_log_means=library_log_means, library_log_vars=library_log_vars, linear_classifier=linear_classifier, @@ -178,6 +199,7 @@ def from_scvi_model( unlabeled_category: str, labels_key: str | None = None, adata: AnnData | None = None, + registry: dict | None = None, **scanvi_kwargs, ): """Initialize scanVI model with weights from pretrained :class:`~scvi.model.SCVI` model. @@ -194,6 +216,8 @@ def from_scvi_model( Value used for unlabeled cells in `labels_key` used to setup AnnData with scvi. adata AnnData object that has been registered via :meth:`~scvi.model.SCANVI.setup_anndata`. + registry + Registry of the datamodule used to train scANVI model. scanvi_kwargs kwargs for scANVI model """ @@ -223,13 +247,15 @@ def from_scvi_model( if adata is None: adata = scvi_model.adata - else: + elif adata: if _is_minified(adata): raise ValueError("Please provide a non-minified `adata` to initialize scANVI.") # validate new anndata against old model scvi_model._validate_anndata(adata) + else: + adata = None - scvi_setup_args = deepcopy(scvi_model.adata_manager.registry[_SETUP_ARGS_KEY]) + scvi_setup_args = deepcopy(scvi_model.registry[_SETUP_ARGS_KEY]) scvi_labels_key = scvi_setup_args["labels_key"] if labels_key is None and scvi_labels_key is None: raise ValueError( @@ -237,35 +263,40 @@ def from_scvi_model( ) if scvi_labels_key is None: scvi_setup_args.update({"labels_key": labels_key}) - cls.setup_anndata( - adata, - unlabeled_category=unlabeled_category, - use_minified=False, - **scvi_setup_args, - ) - scanvi_model = cls(adata, **non_kwargs, **kwargs, **scanvi_kwargs) + if adata is not None: + cls.setup_anndata( + adata, + unlabeled_category=unlabeled_category, + use_minified=False, + **scvi_setup_args, + ) + + scanvi_model = cls(adata, scvi_model.registry, **non_kwargs, **kwargs, **scanvi_kwargs) scvi_state_dict = scvi_model.module.state_dict() scanvi_model.module.load_state_dict(scvi_state_dict, strict=False) scanvi_model.was_pretrained = True return scanvi_model - def _set_indices_and_labels(self): + def _set_indices_and_labels(self, datamodule=None): """Set indices for labeled and unlabeled cells.""" - labels_state_registry = self.adata_manager.get_state_registry(REGISTRY_KEYS.LABELS_KEY) + labels_state_registry = self.get_state_registry(REGISTRY_KEYS.LABELS_KEY) self.original_label_key = labels_state_registry.original_key self.unlabeled_category_ = labels_state_registry.unlabeled_category - labels = get_anndata_attribute( - self.adata, - self.adata_manager.data_registry.labels.attr_name, - self.original_label_key, - ).ravel() + if datamodule is None: + self.labels_ = get_anndata_attribute( + self.adata, + self.adata_manager.data_registry.labels.attr_name, + self.original_label_key, + ).ravel() + else: + self.labels_ = datamodule.labels.ravel() self._label_mapping = labels_state_registry.categorical_mapping # set unlabeled and labeled indices - self._unlabeled_indices = np.argwhere(labels == self.unlabeled_category_).ravel() - self._labeled_indices = np.argwhere(labels != self.unlabeled_category_).ravel() + self._unlabeled_indices = np.argwhere(self.labels_ == self.unlabeled_category_).ravel() + self._labeled_indices = np.argwhere(self.labels_ != self.unlabeled_category_).ravel() self._code_to_label = dict(enumerate(self._label_mapping)) def predict( @@ -357,6 +388,7 @@ def train( devices: int | list[int] | str = "auto", datasplitter_kwargs: dict | None = None, plan_kwargs: dict | None = None, + datamodule: LightningDataModule | None = None, **trainer_kwargs, ): """Train the model. @@ -391,6 +423,10 @@ def train( plan_kwargs Keyword args for :class:`~scvi.train.SemiSupervisedTrainingPlan`. Keyword arguments passed to `train()` will overwrite values present in `plan_kwargs`, when appropriate. + datamodule + ``EXPERIMENTAL`` A :class:`~lightning.pytorch.core.LightningDataModule` instance to use + for training in place of the default :class:`~scvi.dataloaders.DataSplitter`. Can only + be passed in if the model was not initialized with :class:`~anndata.AnnData`. **trainer_kwargs Other keyword args for :class:`~scvi.train.Trainer`. """ @@ -406,17 +442,24 @@ def train( datasplitter_kwargs = datasplitter_kwargs or {} # if we have labeled cells, we want to subsample labels each epoch - sampler_callback = [SubSampleLabels()] if len(self._labeled_indices) != 0 else [] - - data_splitter = SemiSupervisedDataSplitter( - adata_manager=self.adata_manager, - train_size=train_size, - validation_size=validation_size, - shuffle_set_split=shuffle_set_split, - n_samples_per_label=n_samples_per_label, - batch_size=batch_size, - **datasplitter_kwargs, - ) + if datamodule is None: + sampler_callback = [SubSampleLabels()] if len(self._labeled_indices) != 0 else [] + # In the general case we enter here + datasplitter_kwargs = datasplitter_kwargs or {} + datamodule = SemiSupervisedDataSplitter( + adata_manager=self.adata_manager, + datamodule=datamodule, + train_size=train_size, + validation_size=validation_size, + shuffle_set_split=shuffle_set_split, + n_samples_per_label=n_samples_per_label, + batch_size=batch_size, + **datasplitter_kwargs, + ) + else: + # TODO fix in external dataloader? + sampler_callback = [] + training_plan = self._training_plan_cls(self.module, self.n_labels, **plan_kwargs) if "callbacks" in trainer_kwargs.keys(): trainer_kwargs["callbacks"] + [sampler_callback] @@ -426,7 +469,7 @@ def train( runner = TrainRunner( self, training_plan=training_plan, - data_splitter=data_splitter, + data_splitter=datamodule, max_epochs=max_epochs, accelerator=accelerator, devices=devices, @@ -475,9 +518,117 @@ def setup_anndata( NumericalJointObsField(REGISTRY_KEYS.CONT_COVS_KEY, continuous_covariate_keys), ] # register new fields if the adata is minified - adata_minify_type = _get_adata_minify_type(adata) - if adata_minify_type is not None and use_minified: - anndata_fields += cls._get_fields_for_adata_minification(adata_minify_type) - adata_manager = AnnDataManager(fields=anndata_fields, setup_method_args=setup_method_args) - adata_manager.register_fields(adata, **kwargs) - cls.register_manager(adata_manager) + if adata: + adata_minify_type = _get_adata_minify_type(adata) + if adata_minify_type is not None and use_minified: + anndata_fields += cls._get_fields_for_adata_minification(adata_minify_type) + adata_manager = AnnDataManager( + fields=anndata_fields, setup_method_args=setup_method_args + ) + adata_manager.register_fields(adata, **kwargs) + cls.register_manager(adata_manager) + + @classmethod + @setup_anndata_dsp.dedent + def setup_datamodule( + cls, + datamodule: LightningDataModule | None = None, + source_registry=None, + layer: str | None = None, + batch_key: list[str] | None = None, + labels_key: str | None = None, + size_factor_key: str | None = None, + categorical_covariate_keys: list[str] | None = None, + continuous_covariate_keys: list[str] | None = None, + **kwargs, + ): + """%(summary)s. + + Parameters + ---------- + %(param_datamodule)s + %(param_source_registry)s + %(param_layer)s + %(param_batch_key)s + %(param_size_factor_key)s + %(param_cat_cov_keys)s + %(param_cont_cov_keys)s + """ + if datamodule.__class__.__name__ == "CensusSCVIDataModule": + # CZI + batch_mapping = datamodule.datapipe.obs_encoders["batch"].classes_ + labels_mapping = datamodule.datapipe.obs_encoders["label"].classes_ + features_names = list( + datamodule.datapipe.var_query.coords[0] + if datamodule.datapipe.var_query is not None + else range(datamodule.n_vars) + ) + n_batch = datamodule.n_batch + n_label = datamodule.n_label + + else: + # Anndata -> CZI + # if we are here and datamodule is actually an AnnData object + # it means we init the custom dataloder model with anndata + batch_mapping = source_registry["field_registries"]["batch"]["state_registry"][ + "categorical_mapping" + ] + labels_mapping = source_registry["field_registries"]["label"]["state_registry"][ + "categorical_mapping" + ] + features_names = datamodule.var.soma_joinid.values + n_batch = source_registry["field_registries"]["batch"]["summary_stats"]["n_batch"] + n_label = 1 # need to change + + datamodule.registry = { + "scvi_version": scvi.__version__, + "model_name": "SCVI", + "setup_args": { + "layer": layer, + "batch_key": batch_key, + "labels_key": labels_key, + "size_factor_key": size_factor_key, + "categorical_covariate_keys": categorical_covariate_keys, + "continuous_covariate_keys": continuous_covariate_keys, + }, + "field_registries": { + "X": { + "data_registry": {"attr_name": "X", "attr_key": None}, + "state_registry": { + "n_obs": datamodule.n_obs, + "n_vars": datamodule.n_vars, + "column_names": [str(i) for i in features_names], + }, + "summary_stats": {"n_vars": datamodule.n_vars, "n_cells": datamodule.n_obs}, + }, + "batch": { + "data_registry": {"attr_name": "obs", "attr_key": "_scvi_batch"}, + "state_registry": { + "categorical_mapping": batch_mapping, + "original_key": "batch", + }, + "summary_stats": {"n_batch": n_batch}, + }, + "labels": { + "data_registry": {"attr_name": "obs", "attr_key": "_scvi_labels"}, + "state_registry": { + "categorical_mapping": labels_mapping, + "original_key": "label", + "unlabeled_category": datamodule.unlabeled_category, + }, + "summary_stats": {"n_labels": n_label}, + }, + "size_factor": {"data_registry": {}, "state_registry": {}, "summary_stats": {}}, + "extra_categorical_covs": { + "data_registry": {}, + "state_registry": {}, + "summary_stats": {"n_extra_categorical_covs": 0}, + }, + "extra_continuous_covs": { + "data_registry": {}, + "state_registry": {}, + "summary_stats": {"n_extra_continuous_covs": 0}, + }, + }, + "setup_method_name": "setup_datamodule", + } diff --git a/src/scvi/model/_scvi.py b/src/scvi/model/_scvi.py index 1eb23aa138..f89c3723c2 100644 --- a/src/scvi/model/_scvi.py +++ b/src/scvi/model/_scvi.py @@ -4,6 +4,9 @@ import warnings from typing import TYPE_CHECKING +import numpy as np + +import scvi from scvi import REGISTRY_KEYS, settings from scvi.data import AnnDataManager from scvi.data._constants import ADATA_MINIFY_TYPE @@ -26,6 +29,12 @@ from typing import Literal from anndata import AnnData + from lightning import LightningDataModule + + +_SCVI_LATENT_QZM = "_scvi_latent_qzm" +_SCVI_LATENT_QZV = "_scvi_latent_qzv" +_SCVI_OBSERVED_LIB_SIZE = "_scvi_observed_lib_size" logger = logging.getLogger(__name__) @@ -69,6 +78,9 @@ class SCVI( * ``'zinb'`` - Zero-inflated negative binomial distribution * ``'poisson'`` - Poisson distribution * ``'normal'`` - ``EXPERIMENTAL`` Normal distribution + use_observed_lib_size + If ``True``, use the observed library size for RNA as the scaling factor in the mean of the + conditional distribution. latent_distribution One of: @@ -106,17 +118,19 @@ class SCVI( def __init__( self, - adata: AnnData | None = None, + adata: AnnData | None, + registry: dict | None = None, n_hidden: int = 128, n_latent: int = 10, n_layers: int = 1, dropout_rate: float = 0.1, dispersion: Literal["gene", "gene-batch", "gene-label", "gene-cell"] = "gene", gene_likelihood: Literal["zinb", "nb", "poisson", "normal"] = "zinb", + use_observed_lib_size: bool = True, latent_distribution: Literal["normal", "ln"] = "normal", **kwargs, ): - super().__init__(adata) + super().__init__(adata, registry) self._module_kwargs = { "n_hidden": n_hidden, @@ -134,6 +148,7 @@ def __init__( f"dropout_rate: {dropout_rate}, dispersion: {dispersion}, " f"gene_likelihood: {gene_likelihood}, latent_distribution: {latent_distribution}." ) + self._module_init_on_train = False if self._module_init_on_train: self.module = None @@ -144,17 +159,29 @@ def __init__( stacklevel=settings.warnings_stacklevel, ) else: - n_cats_per_cov = ( - self.adata_manager.get_state_registry(REGISTRY_KEYS.CAT_COVS_KEY).n_cats_per_key - if REGISTRY_KEYS.CAT_COVS_KEY in self.adata_manager.data_registry - else None - ) + if adata is not None: + n_cats_per_cov = ( + self.adata_manager.get_state_registry( + REGISTRY_KEYS.CAT_COVS_KEY + ).n_cats_per_key + if REGISTRY_KEYS.CAT_COVS_KEY in self.adata_manager.data_registry + else None + ) + else: + # custom datamodule + n_cats_per_cov = self.summary_stats[f"n_{REGISTRY_KEYS.CAT_COVS_KEY}"] + if n_cats_per_cov == 0: + n_cats_per_cov = None + n_batch = self.summary_stats.n_batch - use_size_factor_key = REGISTRY_KEYS.SIZE_FACTOR_KEY in self.adata_manager.data_registry + use_size_factor_key = self.registry_["setup_args"][ + f"{REGISTRY_KEYS.SIZE_FACTOR_KEY}_key" + ] library_log_means, library_log_vars = None, None if ( not use_size_factor_key and self.minified_data_type != ADATA_MINIFY_TYPE.LATENT_POSTERIOR + and not use_observed_lib_size ): library_log_means, library_log_vars = _init_library_size( self.adata_manager, n_batch @@ -171,6 +198,7 @@ def __init__( dropout_rate=dropout_rate, dispersion=dispersion, gene_likelihood=gene_likelihood, + use_observed_lib_size=use_observed_lib_size, latent_distribution=latent_distribution, use_size_factor_key=use_size_factor_key, library_log_means=library_log_means, @@ -222,3 +250,98 @@ def setup_anndata( adata_manager = AnnDataManager(fields=anndata_fields, setup_method_args=setup_method_args) adata_manager.register_fields(adata, **kwargs) cls.register_manager(adata_manager) + + @classmethod + @setup_anndata_dsp.dedent + def setup_datamodule( + cls, + datamodule: LightningDataModule | None = None, + source_registry=None, + layer: str | None = None, + batch_key: list[str] | None = None, + labels_key: str | None = None, + size_factor_key: str | None = None, + categorical_covariate_keys: list[str] | None = None, + continuous_covariate_keys: list[str] | None = None, + **kwargs, + ): + """%(summary)s. + + Parameters + ---------- + %(param_datamodule)s + %(param_source_registry)s + %(param_layer)s + %(param_batch_key)s + %(param_labels_key)s + %(param_size_factor_key)s + %(param_cat_cov_keys)s + %(param_cont_cov_keys)s + """ + if datamodule.__class__.__name__ == "CensusSCVIDataModule": + # CZI + categorical_mapping = datamodule.datapipe.obs_encoders["batch"].classes_ + column_names = list( + datamodule.datapipe.var_query.coords[0] + if datamodule.datapipe.var_query is not None + else range(datamodule.n_vars) + ) + n_batch = datamodule.n_batch + else: + categorical_mapping = source_registry["field_registries"]["batch"]["state_registry"][ + "categorical_mapping" + ] + column_names = datamodule.var_names + n_batch = source_registry["field_registries"]["batch"]["summary_stats"]["n_batch"] + + datamodule.registry = { + "scvi_version": scvi.__version__, + "model_name": "SCVI", + "setup_args": { + "layer": layer, + "batch_key": batch_key, + "labels_key": labels_key, + "size_factor_key": size_factor_key, + "categorical_covariate_keys": categorical_covariate_keys, + "continuous_covariate_keys": continuous_covariate_keys, + }, + "field_registries": { + "X": { + "data_registry": {"attr_name": "X", "attr_key": None}, + "state_registry": { + "n_obs": datamodule.n_obs, + "n_vars": datamodule.n_vars, + "column_names": [str(i) for i in column_names], + }, + "summary_stats": {"n_vars": datamodule.n_vars, "n_cells": datamodule.n_obs}, + }, + "batch": { + "data_registry": {"attr_name": "obs", "attr_key": "_scvi_batch"}, + "state_registry": { + "categorical_mapping": categorical_mapping, + "original_key": "batch", + }, + "summary_stats": {"n_batch": n_batch}, + }, + "labels": { + "data_registry": {"attr_name": "obs", "attr_key": "_scvi_labels"}, + "state_registry": { + "categorical_mapping": np.array([0]), + "original_key": "_scvi_labels", + }, + "summary_stats": {"n_labels": 1}, + }, + "size_factor": {"data_registry": {}, "state_registry": {}, "summary_stats": {}}, + "extra_categorical_covs": { + "data_registry": {}, + "state_registry": {}, + "summary_stats": {"n_extra_categorical_covs": 0}, + }, + "extra_continuous_covs": { + "data_registry": {}, + "state_registry": {}, + "summary_stats": {"n_extra_continuous_covs": 0}, + }, + }, + "setup_method_name": "setup_datamodule", + } diff --git a/src/scvi/model/_totalvi.py b/src/scvi/model/_totalvi.py index af0659faec..c1ed6e6a6c 100644 --- a/src/scvi/model/_totalvi.py +++ b/src/scvi/model/_totalvi.py @@ -124,6 +124,7 @@ class TOTALVI( def __init__( self, adata: AnnOrMuData, + registry: dict | None = None, n_latent: int = 20, gene_dispersion: Literal["gene", "gene-batch", "gene-label", "gene-cell"] = "gene", protein_dispersion: Literal["protein", "protein-batch", "protein-label"] = "protein", @@ -1223,7 +1224,8 @@ def get_protein_background_mean(self, adata, indices, batch_size): def setup_anndata( cls, adata: AnnData, - protein_expression_obsm_key: str, + registry: dict | None = None, + protein_expression_obsm_key: str | None = None, protein_names_uns_key: str | None = None, batch_key: str | None = None, layer: str | None = None, diff --git a/src/scvi/model/base/_archesmixin.py b/src/scvi/model/base/_archesmixin.py index c490360c2e..8f885d480c 100644 --- a/src/scvi/model/base/_archesmixin.py +++ b/src/scvi/model/base/_archesmixin.py @@ -1,6 +1,9 @@ +from __future__ import annotations + import logging import warnings from copy import deepcopy +from typing import TYPE_CHECKING import anndata import numpy as np @@ -11,12 +14,10 @@ from scipy.sparse import csr_matrix from scvi import settings -from scvi._types import AnnOrMuData from scvi.data import _constants from scvi.data._constants import _MODEL_NAME_KEY, _SETUP_ARGS_KEY, _SETUP_METHOD_NAME from scvi.model._utils import parse_device_args from scvi.model.base._save_load import ( - _get_var_names, _initialize_model, _load_saved_files, _validate_var_names, @@ -24,7 +25,10 @@ from scvi.nn import FCLayers from scvi.utils._docstrings import devices_dsp -from ._base_model import BaseModelClass +if TYPE_CHECKING: + from scvi._types import AnnOrMuData + + from ._base_model import BaseModelClass logger = logging.getLogger(__name__) @@ -38,8 +42,9 @@ class ArchesMixin: @devices_dsp.dedent def load_query_data( cls, - adata: AnnOrMuData, - reference_model: str | BaseModelClass, + adata: AnnOrMuData = None, + reference_model: str | BaseModelClass = None, + registry: dict = None, inplace_subset_query_vars: bool = False, accelerator: str = "auto", device: int | str = "auto", @@ -82,6 +87,11 @@ def load_query_data( freeze_classifier Whether to freeze classifier completely. Only applies to `SCANVI`. """ + if reference_model is None: + raise ValueError("Please provide a reference model as string or loaded model.") + if adata is None and registry is None: + raise ValueError("Please provide either an AnnData or a registry dictionary.") + _, _, device = parse_device_args( accelerator=accelerator, devices=device, @@ -89,49 +99,52 @@ def load_query_data( validate_single_device=True, ) - attr_dict, var_names, load_state_dict = _get_loaded_data(reference_model, device=device) + attr_dict, var_names, load_state_dict = _get_loaded_data( + reference_model, device=device, adata=adata + ) + + if adata: + if isinstance(adata, MuData): + for modality in adata.mod: + if inplace_subset_query_vars: + logger.debug(f"Subsetting {modality} query vars to reference vars.") + adata[modality]._inplace_subset_var(var_names[modality]) + _validate_var_names(adata[modality], var_names[modality]) - if isinstance(adata, MuData): - for modality in adata.mod: + else: if inplace_subset_query_vars: - logger.debug(f"Subsetting {modality} query vars to reference vars.") - adata[modality]._inplace_subset_var(var_names[modality]) - _validate_var_names(adata[modality], var_names[modality]) + logger.debug("Subsetting query vars to reference vars.") + adata._inplace_subset_var(var_names) + _validate_var_names(adata, var_names) - else: if inplace_subset_query_vars: logger.debug("Subsetting query vars to reference vars.") adata._inplace_subset_var(var_names) _validate_var_names(adata, var_names) - if inplace_subset_query_vars: - logger.debug("Subsetting query vars to reference vars.") - adata._inplace_subset_var(var_names) - _validate_var_names(adata, var_names) + registry = attr_dict.pop("registry_") + if _MODEL_NAME_KEY in registry and registry[_MODEL_NAME_KEY] != cls.__name__: + raise ValueError("It appears you are loading a model from a different class.") - registry = attr_dict.pop("registry_") - if _MODEL_NAME_KEY in registry and registry[_MODEL_NAME_KEY] != cls.__name__: - raise ValueError("It appears you are loading a model from a different class.") + if _SETUP_ARGS_KEY not in registry: + raise ValueError( + "Saved model does not contain original setup inputs. " + "Cannot load the original setup." + ) - if _SETUP_ARGS_KEY not in registry: - raise ValueError( - "Saved model does not contain original setup inputs. " - "Cannot load the original setup." + setup_method = getattr(cls, registry[_SETUP_METHOD_NAME]) + setup_method( + adata, + source_registry=registry, + extend_categories=True, + allow_missing_labels=True, + **registry[_SETUP_ARGS_KEY], ) - setup_method = getattr(cls, registry[_SETUP_METHOD_NAME]) - setup_method( - adata, - source_registry=registry, - extend_categories=True, - allow_missing_labels=True, - **registry[_SETUP_ARGS_KEY], - ) + model = _initialize_model(cls, adata, registry, attr_dict) - model = _initialize_model(cls, adata, attr_dict) - adata_manager = model.get_anndata_manager(adata, required=True) + version_split = model.registry[_constants._SCVI_VERSION_KEY].split(".") - version_split = adata_manager.registry[_constants._SCVI_VERSION_KEY].split(".") if int(version_split[1]) < 8 and int(version_split[0]) == 0: warnings.warn( "Query integration should be performed using models trained with " @@ -202,7 +215,7 @@ def prepare_query_anndata( Query adata ready to use in `load_query_data` unless `return_reference_var_names` in which case a pd.Index of reference var names is returned. """ - _, var_names, _ = _get_loaded_data(reference_model, device="cpu") + _, var_names, _ = _get_loaded_data(reference_model, device="cpu", adata=adata) var_names = pd.Index(var_names) if return_reference_var_names: @@ -350,7 +363,7 @@ def requires_grad(key): par.requires_grad = False -def _get_loaded_data(reference_model, device=None): +def _get_loaded_data(reference_model, device=None, adata=None): if isinstance(reference_model, str): attr_dict, var_names, load_state_dict, _ = _load_saved_files( reference_model, load_adata=False, map_location=device @@ -358,7 +371,7 @@ def _get_loaded_data(reference_model, device=None): else: attr_dict = reference_model._get_user_attributes() attr_dict = {a[0]: a[1] for a in attr_dict if a[0][-1] == "_"} - var_names = _get_var_names(reference_model.adata) + var_names = reference_model.get_var_names() load_state_dict = deepcopy(reference_model.module.state_dict()) return attr_dict, var_names, load_state_dict diff --git a/src/scvi/model/base/_base_model.py b/src/scvi/model/base/_base_model.py index aaafca9dcc..ab81938446 100644 --- a/src/scvi/model/base/_base_model.py +++ b/src/scvi/model/base/_base_model.py @@ -3,8 +3,10 @@ import inspect import logging import os +import sys import warnings from abc import ABCMeta, abstractmethod +from io import StringIO from typing import TYPE_CHECKING from uuid import uuid4 @@ -13,19 +15,29 @@ import torch from anndata import AnnData from mudata import MuData +from rich import box +from rich.console import Console from scvi import REGISTRY_KEYS, settings from scvi.data import AnnDataManager, fields from scvi.data._compat import registry_from_setup_dict from scvi.data._constants import ( _ADATA_MINIFY_TYPE_UNS_KEY, + _FIELD_REGISTRIES_KEY, _MODEL_NAME_KEY, _SCVI_UUID_KEY, + _SCVI_VERSION_KEY, _SETUP_ARGS_KEY, _SETUP_METHOD_NAME, + _STATE_REGISTRY_KEY, ADATA_MINIFY_TYPE, ) -from scvi.data._utils import _assign_adata_uuid, _check_if_view, _get_adata_minify_type +from scvi.data._utils import ( + _assign_adata_uuid, + _check_if_view, + _get_adata_minify_type, + _get_summary_stats_from_registry, +) from scvi.dataloaders import AnnDataLoader from scvi.model._utils import parse_device_args from scvi.model.base._constants import SAVE_KEYS @@ -39,9 +51,13 @@ from scvi.utils import attrdict, setup_anndata_dsp from scvi.utils._docstrings import devices_dsp +from . import _constants + if TYPE_CHECKING: from collections.abc import Sequence + import pandas as pd + from scvi._types import AnnOrMuData, MinifiedDataType logger = logging.getLogger(__name__) @@ -93,7 +109,7 @@ class BaseModelClass(metaclass=BaseModelMetaClass): _OBSERVED_LIB_SIZE_KEY = "observed_lib_size" _data_loader_cls = AnnDataLoader - def __init__(self, adata: AnnOrMuData | None = None): + def __init__(self, adata: AnnOrMuData | None = None, registry: object | None = None): # check if the given adata is minified and check if the model being created # supports minified-data mode (i.e. inherits from the abstract BaseMinifiedModeModelClass). # If not, raise an error to inform the user of the lack of minified-data functionality @@ -109,8 +125,19 @@ def __init__(self, adata: AnnOrMuData | None = None): self._adata_manager = self._get_most_recent_anndata_manager(adata, required=True) self._register_manager_for_instance(self.adata_manager) # Suffix registry instance variable with _ to include it when saving the model. - self.registry_ = self._adata_manager.registry - self.summary_stats = self._adata_manager.summary_stats + self.registry_ = self._adata_manager._registry + self.summary_stats = _get_summary_stats_from_registry(self.registry_) + elif registry is not None: + self._adata = None + self._adata_manager = None + # Suffix registry instance variable with _ to include it when saving the model. + self.registry_ = registry + self.summary_stats = _get_summary_stats_from_registry(registry) + elif self.__class__.__name__ == "GIMVI": + # note some models do accept empty registry/adata (e.g: gimvi) + pass + else: + raise ValueError("adata or registry must be provided.") self._module_init_on_train = adata is None self.is_trained_ = False @@ -121,10 +148,24 @@ def __init__(self, adata: AnnOrMuData | None = None): self.history_ = None @property - def adata(self) -> AnnOrMuData: + def adata(self) -> None | AnnOrMuData: """Data attached to model instance.""" return self._adata + @property + def registry(self) -> dict: + """Data attached to model instance.""" + return self.registry_ + + def get_var_names(self, legacy_mudata_format=False) -> dict: + """Variable names of input data.""" + from scvi.model.base._save_load import _get_var_names + + if self.adata: + return _get_var_names(self.adata, legacy_mudata_format=legacy_mudata_format) + else: + return self.registry[_FIELD_REGISTRIES_KEY]["X"][_STATE_REGISTRY_KEY]["column_names"] + @adata.setter def adata(self, adata: AnnOrMuData): if adata is None: @@ -246,6 +287,23 @@ def _register_manager_for_instance(self, adata_manager: AnnDataManager): instance_manager_store = self._per_instance_manager_store[self.id] instance_manager_store[adata_id] = adata_manager + def data_registry(self, registry_key: str) -> np.ndarray | pd.DataFrame: + """Returns the object in AnnData associated with the key in the data registry. + + Parameters + ---------- + registry_key + key of object to get from ``self.data_registry`` + + Returns + ------- + The requested data. + """ + if not self.adata: + raise ValueError("self.adata is None. Please register AnnData object to access data.") + else: + return self._adata_manager.get_from_registry(registry_key) + def deregister_manager(self, adata: AnnData | None = None): """Deregisters the :class:`~scvi.data.AnnDataManager` instance associated with `adata`. @@ -338,6 +396,9 @@ def get_anndata_manager( If True, errors on missing manager. Otherwise, returns None when manager is missing. """ cls = self.__class__ + if not adata: + return None + if _SCVI_UUID_KEY not in adata.uns: if required: raise ValueError( @@ -477,6 +538,13 @@ def _validate_anndata( return adata + def transfer_fields(self, adata: AnnOrMuData, **kwargs) -> AnnData: + """Transfer fields from a model to an AnnData object.""" + if self.adata: + return self.adata_manager.transfer_fields(adata, **kwargs) + else: + raise ValueError("Model need to be initialized with AnnData to transfer fields.") + def _check_if_trained(self, warn: bool = True, message: str = _UNTRAINED_WARNING_MESSAGE): """Check if the model is trained. @@ -539,7 +607,7 @@ def _get_user_attributes(self): def _get_init_params(self, locals): """Returns the model init signature with associated passed in values. - Ignores the initial AnnData. + Ignores the initial AnnData or Registry. """ init = self.__init__ sig = inspect.signature(init) @@ -550,7 +618,9 @@ def _get_init_params(self, locals): all_params = { k: v for (k, v) in all_params.items() - if not isinstance(v, AnnData) and not isinstance(v, MuData) + if not isinstance(v, AnnData) + and not isinstance(v, MuData) + and k not in ("adata", "registry") } # not very efficient but is explicit # separates variable params (**kwargs) from non variable params into two dicts @@ -605,8 +675,6 @@ def save( anndata_write_kwargs Kwargs for :meth:`~anndata.AnnData.write` """ - from scvi.model.base._save_load import _get_var_names - if not os.path.exists(dir_path) or overwrite: os.makedirs(dir_path, exist_ok=overwrite) else: @@ -633,7 +701,7 @@ def save( # save the model state dict and the trainer state dict only model_state_dict = self.module.state_dict() - var_names = _get_var_names(self.adata, legacy_mudata_format=legacy_mudata_format) + var_names = self.get_var_names(legacy_mudata_format=legacy_mudata_format) # get all the user attributes user_attributes = self._get_user_attributes() @@ -672,6 +740,7 @@ def load( It is not necessary to run setup_anndata, as AnnData is validated against the saved `scvi` setup dictionary. If None, will check for and load anndata saved with the model. + If False, will load the model without AnnData. %(param_accelerator)s %(param_device)s prefix @@ -710,31 +779,31 @@ def load( ) adata = new_adata if new_adata is not None else adata - _validate_var_names(adata, var_names) - registry = attr_dict.pop("registry_") if _MODEL_NAME_KEY in registry and registry[_MODEL_NAME_KEY] != cls.__name__: raise ValueError("It appears you are loading a model from a different class.") - if _SETUP_ARGS_KEY not in registry: - raise ValueError( - "Saved model does not contain original setup inputs. " - "Cannot load the original setup." - ) - # Calling ``setup_anndata`` method with the original arguments passed into # the saved model. This enables simple backwards compatibility in the case of # newly introduced fields or parameters. - method_name = registry.get(_SETUP_METHOD_NAME, "setup_anndata") - getattr(cls, method_name)(adata, source_registry=registry, **registry[_SETUP_ARGS_KEY]) + if adata: + if _SETUP_ARGS_KEY not in registry: + raise ValueError( + "Saved model does not contain original setup inputs. " + "Cannot load the original setup." + ) + _validate_var_names(adata, var_names) + method_name = registry.get(_SETUP_METHOD_NAME, "setup_anndata") + getattr(cls, method_name)(adata, source_registry=registry, **registry[_SETUP_ARGS_KEY]) - model = _initialize_model(cls, adata, attr_dict) + model = _initialize_model(cls, adata, registry, attr_dict) model.module.on_load(model) model.module.load_state_dict(model_state_dict) model.to_device(device) model.module.eval() - model._validate_anndata(adata) + if adata: + model._validate_anndata(adata) return model @classmethod @@ -890,6 +959,149 @@ def view_anndata_setup( ) from err adata_manager.view_registry(hide_state_registries=hide_state_registries) + def view_setup_method_args(self) -> None: + """Prints setup kwargs used to produce a given registry. + + Parameters + ---------- + registry + Registry produced by an AnnDataManager. + """ + model_name = self.registry_[_MODEL_NAME_KEY] + setup_args = self.registry_[_SETUP_ARGS_KEY] + if model_name is not None and setup_args is not None: + rich.print(f"Setup via `{model_name}.setup_anndata` with arguments:") + rich.pretty.pprint(setup_args) + rich.print() + + def view_registry(self, hide_state_registries: bool = False) -> None: + """Prints summary of the registry. + + Parameters + ---------- + hide_state_registries + If True, prints a shortened summary without details of each state registry. + """ + version = self.registry_[_SCVI_VERSION_KEY] + rich.print(f"Anndata setup with scvi-tools version {version}.") + rich.print() + self.view_setup_method_args(self._registry) + + in_colab = "google.colab" in sys.modules + force_jupyter = None if not in_colab else True + console = rich.console.Console(force_jupyter=force_jupyter) + + ss = _get_summary_stats_from_registry(self._registry) + dr = self._get_data_registry_from_registry(self._registry) + console.print(self._view_summary_stats(ss)) + console.print(self._view_data_registry(dr)) + + if not hide_state_registries: + for field in self.fields: + state_registry = self.get_state_registry(field.registry_key) + t = field.view_state_registry(state_registry) + if t is not None: + console.print(t) + + def get_state_registry(self, registry_key: str) -> attrdict: + """Returns the state registry for the AnnDataField registered with this instance.""" + return attrdict(self.registry_[_FIELD_REGISTRIES_KEY][registry_key][_STATE_REGISTRY_KEY]) + + def get_setup_arg(self, setup_arg: str) -> attrdict: + """Returns the string provided to setup of a specific setup_arg.""" + return self.registry_[_SETUP_ARGS_KEY][setup_arg] + + @staticmethod + def _view_summary_stats( + summary_stats: attrdict, as_markdown: bool = False + ) -> rich.table.Table | str: + """Prints summary stats.""" + if not as_markdown: + t = rich.table.Table(title="Summary Statistics") + else: + t = rich.table.Table(box=box.MARKDOWN) + + t.add_column( + "Summary Stat Key", + justify="center", + style="dodger_blue1", + no_wrap=True, + overflow="fold", + ) + t.add_column( + "Value", + justify="center", + style="dark_violet", + no_wrap=True, + overflow="fold", + ) + for stat_key, count in summary_stats.items(): + t.add_row(stat_key, str(count)) + + if as_markdown: + console = Console(file=StringIO(), force_jupyter=False) + console.print(t) + return console.file.getvalue().strip() + + return t + + @staticmethod + def _view_data_registry( + data_registry: attrdict, as_markdown: bool = False + ) -> rich.table.Table | str: + """Prints data registry.""" + if not as_markdown: + t = rich.table.Table(title="Data Registry") + else: + t = rich.table.Table(box=box.MARKDOWN) + + t.add_column( + "Registry Key", + justify="center", + style="dodger_blue1", + no_wrap=True, + overflow="fold", + ) + t.add_column( + "scvi-tools Location", + justify="center", + style="dark_violet", + no_wrap=True, + overflow="fold", + ) + + for registry_key, data_loc in data_registry.items(): + mod_key = getattr(data_loc, _constants._DR_MOD_KEY, None) + attr_name = data_loc.attr_name + attr_key = data_loc.attr_key + scvi_data_str = "adata" + if mod_key is not None: + scvi_data_str += f".mod['{mod_key}']" + if attr_key is None: + scvi_data_str += f".{attr_name}" + else: + scvi_data_str += f".{attr_name}['{attr_key}']" + t.add_row(registry_key, scvi_data_str) + + if as_markdown: + console = Console(file=StringIO(), force_jupyter=False) + console.print(t) + return console.file.getvalue().strip() + + return t + + def update_setup_method_args(self, setup_method_args: dict): + """Update setup method args. + + Parameters + ---------- + setup_method_args + This is a bit of a misnomer, this is a dict representing kwargs + of the setup method that will be used to update the existing values + in the registry of this instance. + """ + self._registry[_SETUP_ARGS_KEY].update(setup_method_args) + class BaseMinifiedModeModelClass(BaseModelClass): """Abstract base class for scvi-tools models that can handle minified data.""" @@ -897,11 +1109,14 @@ class BaseMinifiedModeModelClass(BaseModelClass): @property def minified_data_type(self) -> MinifiedDataType | None: """The type of minified data associated with this model, if applicable.""" - return ( - self.adata_manager.get_from_registry(REGISTRY_KEYS.MINIFY_TYPE_KEY) - if REGISTRY_KEYS.MINIFY_TYPE_KEY in self.adata_manager.data_registry - else None - ) + if self.adata_manager: + return ( + self.adata_manager.get_from_registry(REGISTRY_KEYS.MINIFY_TYPE_KEY) + if REGISTRY_KEYS.MINIFY_TYPE_KEY in self.adata_manager.data_registry + else None + ) + else: + return None def minify_adata( self, diff --git a/src/scvi/model/base/_save_load.py b/src/scvi/model/base/_save_load.py index c990f4880c..736c260f80 100644 --- a/src/scvi/model/base/_save_load.py +++ b/src/scvi/model/base/_save_load.py @@ -102,7 +102,7 @@ def _load_saved_files( return attr_dict, var_names, model_state_dict, adata -def _initialize_model(cls, adata, attr_dict): +def _initialize_model(cls, adata, registry, attr_dict): """Helper to initialize a model.""" if "init_params_" not in attr_dict.keys(): raise ValueError( @@ -133,7 +133,10 @@ def _initialize_model(cls, adata, attr_dict): if "pretrained_model" in non_kwargs.keys(): non_kwargs.pop("pretrained_model") - model = cls(adata, **non_kwargs, **kwargs) + if not adata: + adata = None + + model = cls(adata, registry=registry, **non_kwargs, **kwargs) for attr, val in attr_dict.items(): setattr(model, attr, val) @@ -177,7 +180,9 @@ def _get_var_names( def _validate_var_names( - adata: AnnOrMuData, source_var_names: npt.NDArray | dict[str, npt.NDArray] + adata: AnnOrMuData | None, + source_var_names: npt.NDArray | dict[str, npt.NDArray], + load_var_names: npt.NDArray | dict[str, npt.NDArray] | None = None, ) -> None: """Validate that source and loaded variable names match. @@ -188,15 +193,19 @@ def _validate_var_names( source_var_names Variable names from a saved model file corresponding to the variable names used during training. + load_var_names + Variable names from the loaded registry. """ from numpy import array_equal - is_anndata = isinstance(adata, AnnData) source_per_mod_var_names = isinstance(source_var_names, dict) - load_var_names = _get_var_names( - adata, - legacy_mudata_format=(not is_anndata and not source_per_mod_var_names), - ) + + if load_var_names is None: + is_anndata = isinstance(adata, AnnData) + load_var_names = _get_var_names( + adata, + legacy_mudata_format=(not is_anndata and not source_per_mod_var_names), + ) if source_per_mod_var_names: valid_load_var_names = all( @@ -208,7 +217,7 @@ def _validate_var_names( if not valid_load_var_names: warnings.warn( - "`var_names` for the loaded `adata` does not match those of the `adata` used to " + "`var_names` for the loaded `model` does not match those used to " "train the model. For valid results, the former should match the latter.", UserWarning, stacklevel=settings.warnings_stacklevel, diff --git a/src/scvi/model/base/_training_mixin.py b/src/scvi/model/base/_training_mixin.py index ebace98445..e27727c029 100644 --- a/src/scvi/model/base/_training_mixin.py +++ b/src/scvi/model/base/_training_mixin.py @@ -84,27 +84,14 @@ def train( **kwargs Additional keyword arguments passed into :class:`~scvi.train.Trainer`. """ - if datamodule is not None and not self._module_init_on_train: - raise ValueError( - "Cannot pass in `datamodule` if the model was initialized with `adata`." - ) - elif datamodule is None and self._module_init_on_train: - raise ValueError( - "If the model was not initialized with `adata`, a `datamodule` must be passed in." - ) - if max_epochs is None: - if datamodule is None: + if self.adata is not None: max_epochs = get_max_epochs_heuristic(self.adata.n_obs) - elif hasattr(datamodule, "n_obs"): - max_epochs = get_max_epochs_heuristic(datamodule.n_obs) else: - raise ValueError( - "If `datamodule` does not have `n_obs` attribute, `max_epochs` must be " - "passed in." - ) + max_epochs = get_max_epochs_heuristic(self.summary_stats.n_obs) if datamodule is None: + # In the general case we enter here datasplitter_kwargs = datasplitter_kwargs or {} datamodule = self._data_splitter_cls( self.adata_manager, @@ -116,15 +103,6 @@ def train( load_sparse_tensor=load_sparse_tensor, **datasplitter_kwargs, ) - elif self.module is None: - self.module = self._module_cls( - datamodule.n_vars, - n_batch=datamodule.n_batch, - n_labels=getattr(datamodule, "n_labels", 1), - n_continuous_cov=getattr(datamodule, "n_continuous_cov", 0), - n_cats_per_cov=getattr(datamodule, "n_cats_per_cov", None), - **self._module_kwargs, - ) plan_kwargs = plan_kwargs or {} training_plan = self._training_plan_cls(self.module, **plan_kwargs) diff --git a/src/scvi/module/_scanvae.py b/src/scvi/module/_scanvae.py index 55c7d0a8a0..fb2a0a178d 100644 --- a/src/scvi/module/_scanvae.py +++ b/src/scvi/module/_scanvae.py @@ -66,6 +66,9 @@ class SCANVAE(VAE): * ``'nb'`` - Negative binomial distribution * ``'zinb'`` - Zero-inflated negative binomial distribution + use_observed_lib_size + If ``True``, use the observed library size for RNA as the scaling factor in the mean of the + conditional distribution. y_prior If None, initialized to uniform probability over cell types labels_groups @@ -102,6 +105,7 @@ def __init__( dispersion: Literal["gene", "gene-batch", "gene-label", "gene-cell"] = "gene", log_variational: bool = True, gene_likelihood: Literal["zinb", "nb"] = "zinb", + use_observed_lib_size: bool = True, y_prior: torch.Tensor | None = None, labels_groups: Sequence[int] = None, use_labels_groups: bool = False, @@ -123,6 +127,7 @@ def __init__( dispersion=dispersion, log_variational=log_variational, gene_likelihood=gene_likelihood, + use_observed_lib_size=use_observed_lib_size, use_batch_norm=use_batch_norm, use_layer_norm=use_layer_norm, **vae_kwargs, diff --git a/tests/conftest.py b/tests/conftest.py index 6ef9467efc..2c6d29d1e1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,12 @@ def pytest_addoption(parser): default=False, help="Run tests that are desinged for multiGPU.", ) + parser.addoption( + "--custom-dataloader-tests", + action="store_true", + default=False, + help="Run tests that deals with custom dataloaders. This increases test time.", + ) parser.addoption( "--optional", action="store_true", @@ -72,6 +78,23 @@ def pytest_collection_modifyitems(config, items): elif run_internet and ("internet" not in item.keywords): item.add_marker(skip_non_internet) + run_custom_dataloader = config.getoption("--custom-dataloader-tests") + skip_custom_dataloader = pytest.mark.skip( + reason="need ---custom-dataloader-tests option to run" + ) + skip_non_custom_dataloader = pytest.mark.skip( + reason="test not having a pytest.mark.custom_dataloader decorator" + ) + for item in items: + # All tests marked with `pytest.mark.custom_dataloader` get skipped unless + # `--custom_dataloader-tests` passed + if not run_custom_dataloader and ("dataloader" in item.keywords): + item.add_marker(skip_custom_dataloader) + # Skip all tests not marked with `pytest.mark.custom_dataloader` + # if `--custom-dataloader-tests` passed + elif run_internet and ("dataloader" not in item.keywords): + item.add_marker(skip_non_custom_dataloader) + run_optional = config.getoption("--optional") skip_optional = pytest.mark.skip(reason="need --optional option to run") skip_non_optional = pytest.mark.skip(reason="test not having a pytest.mark.optional decorator") diff --git a/tests/dataloaders/test.ipynb b/tests/dataloaders/test.ipynb new file mode 100644 index 0000000000..0eef7523ed --- /dev/null +++ b/tests/dataloaders/test.ipynb @@ -0,0 +1,3818 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m!\u001b[0m schema module 'ourprojects' is not installed → no access to its labels & registries (resolve via `pip install ourprojects`)\n", + "\u001b[92m→\u001b[0m connected lamindb: laminlabs/cellxgene\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/docrep/decorators.py:43: SyntaxWarning: 'param_datamodule' is not a valid key!\n", + " doc = func(self, args[0].__doc__, *args[1:], **kwargs)\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/docrep/decorators.py:43: SyntaxWarning: 'param_source_registry' is not a valid key!\n", + " doc = func(self, args[0].__doc__, *args[1:], **kwargs)\n" + ] + } + ], + "source": [ + "from __future__ import annotations\n", + "\n", +<<<<<<< HEAD + "import os\n", + "import sys\n", + "from pprint import pprint\n", + "\n", + "# import numpy as np\n", + "import lamindb as ln\n", + "import numpy as np\n", + "import pandas as pd\n", + "import psutil\n", + "import pytest\n", + "import scanpy as sc\n", + "import tqdm\n", + "from lightning.pytorch import LightningDataModule\n", + "from torch.utils.data import DataLoader\n", + "\n", + "import scvi\n", + "from scvi.data import synthetic_iid" +======= + "# import numpy as np\n", + "import lamindb as ln\n", + "import numpy as np\n", + "import psutil\n", + "from lightning.pytorch import LightningDataModule\n", + "from torch.utils.data import DataLoader\n", + "\n", + "import scvi" +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "class MappedCollectionDataModule(LightningDataModule):\n", + " def __init__(\n", + " self,\n", + " collection: ln.Collection,\n", + " batch_key: str | None = None,\n", + " label_key: str | None = None,\n", + " batch_size: int = 128,\n", +<<<<<<< HEAD + " **kwargs\n", +======= + " **kwargs,\n", +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + " ):\n", + " self._batch_size = batch_size\n", + " self._batch_key = batch_key\n", + " self._label_key = label_key\n", + " self._parallel = kwargs.pop(\"parallel\", True)\n", + " # here we initialize MappedCollection to use in a pytorch DataLoader\n", + " self._dataset = collection.mapped(\n", +<<<<<<< HEAD + " obs_keys=[self._batch_key, self._label_key],\n", + " parallel=self._parallel,\n", + " **kwargs\n", +======= + " obs_keys=[self._batch_key, self._label_key], parallel=self._parallel, **kwargs\n", +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + " )\n", + " # need by scvi and lightning.pytorch\n", + " self._log_hyperparams = False\n", + " self.allow_zero_length_dataloader_with_multiple_devices = False\n", + "\n", + " def close(self):\n", + " self._dataset.close()\n", + "\n", + " def setup(self, stage):\n", + " pass\n", + "\n", + " def train_dataloader(self):\n", + " return self._create_dataloader(shuffle=True)\n", + "\n", + " def inference_dataloader(self):\n", + " \"\"\"Dataloader for inference with `on_before_batch_transfer` applied.\"\"\"\n", + " dataloader = self._create_dataloader(shuffle=False, batch_size=4096)\n", + " return self._InferenceDataloader(dataloader, self.on_before_batch_transfer)\n", + "\n", + " def _create_dataloader(self, shuffle, batch_size=None):\n", + " if self._parallel:\n", + " num_workers = psutil.cpu_count() - 1\n", + " worker_init_fn = self._dataset.torch_worker_init_fn\n", + " else:\n", + " num_workers = 0\n", + " worker_init_fn = None\n", + " if batch_size is None:\n", + " batch_size = self._batch_size\n", + " return DataLoader(\n", + " self._dataset,\n", + " batch_size=batch_size,\n", + " shuffle=shuffle,\n", + " num_workers=num_workers,\n", + " worker_init_fn=worker_init_fn,\n", + " )\n", + "\n", + " @property\n", + " def n_obs(self) -> int:\n", + " return self._dataset.n_obs\n", + "\n", + " @property\n", + " def var_names(self) -> int:\n", + " return self._dataset.var_joint\n", + "\n", + " @property\n", + " def n_vars(self) -> int:\n", + " return self._dataset.n_vars\n", + "\n", + " @property\n", + " def n_batch(self) -> int:\n", + " if self._batch_key is None:\n", + " return 1\n", + " return len(self._dataset.encoders[self._batch_key])\n", + "\n", + " @property\n", + " def n_labels(self) -> int:\n", + " if self._label_key is None:\n", + " return 1\n", + " return len(self._dataset.encoders[self._label_key])\n", + "\n", + " @property\n", + " def labels(self) -> np.ndarray:\n", + " return self._dataset[self._label_key]\n", + "\n", + " @property\n", + " def registry(self) -> dict:\n", +<<<<<<< HEAD + " return{\n", + " 'scvi_version': scvi.__version__,\n", + " 'model_name': 'SCVI',\n", + " 'setup_args': {\n", + " 'layer': None,\n", + " 'batch_key': self._batch_key,\n", + " 'labels_key': self._label_key,\n", + " 'size_factor_key': None,\n", + " 'categorical_covariate_keys': None,\n", + " 'continuous_covariate_keys': None,\n", + " },\n", + " 'field_registries': {\n", + " 'X': {'data_registry': {'attr_name': 'X', 'attr_key': None},\n", + " 'state_registry': {'n_obs': self.n_obs,\n", + " 'n_vars': self.n_vars,\n", + " 'column_names': self.var_names},\n", + " 'summary_stats': {'n_vars': self.n_vars, 'n_cells': self.n_obs}},\n", + " 'batch': {'data_registry': {'attr_name': 'obs',\n", + " 'attr_key': '_scvi_batch'},\n", + " 'state_registry': {'categorical_mapping': self.batch_keys,\n", + " 'original_key': self._batch_key},\n", + " 'summary_stats': {'n_batch': self.n_batch}},\n", + " 'labels': {'data_registry': {'attr_name': 'obs',\n", + " 'attr_key': '_scvi_labels'},\n", + " 'state_registry': {'categorical_mapping': self.label_keys,\n", + " 'original_key': self._label_key,\n", + " 'unlabeled_category': 'unlabeled'},\n", + " 'summary_stats': {'n_labels': self.n_labels}},\n", + " 'size_factor': {'data_registry': {},\n", + " 'state_registry': {},\n", + " 'summary_stats': {}},\n", + " 'extra_categorical_covs': {'data_registry': {},\n", + " 'state_registry': {},\n", + " 'summary_stats': {'n_extra_categorical_covs': 0}},\n", + " 'extra_continuous_covs': {'data_registry': {},\n", + " 'state_registry': {},\n", + " 'summary_stats': {'n_extra_continuous_covs': 0}}\n", + " },\n", + " 'setup_method_name': 'setup_anndata',\n", +======= + " return {\n", + " \"scvi_version\": scvi.__version__,\n", + " \"model_name\": \"SCVI\",\n", + " \"setup_args\": {\n", + " \"layer\": None,\n", + " \"batch_key\": self._batch_key,\n", + " \"labels_key\": self._label_key,\n", + " \"size_factor_key\": None,\n", + " \"categorical_covariate_keys\": None,\n", + " \"continuous_covariate_keys\": None,\n", + " },\n", + " \"field_registries\": {\n", + " \"X\": {\n", + " \"data_registry\": {\"attr_name\": \"X\", \"attr_key\": None},\n", + " \"state_registry\": {\n", + " \"n_obs\": self.n_obs,\n", + " \"n_vars\": self.n_vars,\n", + " \"column_names\": self.var_names,\n", + " },\n", + " \"summary_stats\": {\"n_vars\": self.n_vars, \"n_cells\": self.n_obs},\n", + " },\n", + " \"batch\": {\n", + " \"data_registry\": {\"attr_name\": \"obs\", \"attr_key\": \"_scvi_batch\"},\n", + " \"state_registry\": {\n", + " \"categorical_mapping\": self.batch_keys,\n", + " \"original_key\": self._batch_key,\n", + " },\n", + " \"summary_stats\": {\"n_batch\": self.n_batch},\n", + " },\n", + " \"labels\": {\n", + " \"data_registry\": {\"attr_name\": \"obs\", \"attr_key\": \"_scvi_labels\"},\n", + " \"state_registry\": {\n", + " \"categorical_mapping\": self.label_keys,\n", + " \"original_key\": self._label_key,\n", + " \"unlabeled_category\": \"unlabeled\",\n", + " },\n", + " \"summary_stats\": {\"n_labels\": self.n_labels},\n", + " },\n", + " \"size_factor\": {\"data_registry\": {}, \"state_registry\": {}, \"summary_stats\": {}},\n", + " \"extra_categorical_covs\": {\n", + " \"data_registry\": {},\n", + " \"state_registry\": {},\n", + " \"summary_stats\": {\"n_extra_categorical_covs\": 0},\n", + " },\n", + " \"extra_continuous_covs\": {\n", + " \"data_registry\": {},\n", + " \"state_registry\": {},\n", + " \"summary_stats\": {\"n_extra_continuous_covs\": 0},\n", + " },\n", + " },\n", + " \"setup_method_name\": \"setup_anndata\",\n", +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + " }\n", + "\n", + " @property\n", + " def batch_keys(self) -> int:\n", + " if self._batch_key is None:\n", + " return None\n", + " return self._dataset.encoders[self._batch_key]\n", + "\n", + " @property\n", + " def label_keys(self) -> int:\n", + " if self._label_key is None:\n", + " return None\n", + " return self._dataset.encoders[self._label_key]\n", + "\n", + " def on_before_batch_transfer(\n", + " self,\n", + " batch,\n", + " dataloader_idx,\n", + " ):\n", + " X_KEY: str = \"X\"\n", + " BATCH_KEY: str = \"batch\"\n", + " LABEL_KEY: str = \"labels\"\n", + "\n", + " return {\n", + " X_KEY: batch[\"X\"].float(),\n", + " BATCH_KEY: batch[self._batch_key][:, None] if self._batch_key is not None else None,\n", + " LABEL_KEY: batch[self._label_key][:, None] if self._label_key is not None else None,\n", + " }\n", + "\n", + " class _InferenceDataloader:\n", + " \"\"\"Wrapper to apply `on_before_batch_transfer` during iteration.\"\"\"\n", + "\n", + " def __init__(self, dataloader, transform_fn):\n", + " self.dataloader = dataloader\n", + " self.transform_fn = transform_fn\n", + "\n", + " def __iter__(self):\n", + " for i, batch in enumerate(self.dataloader):\n", + " yield self.transform_fn(batch, dataloader_idx=None)\n", + "\n", + " def __len__(self):\n", + " return len(self.dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "collection = ln.Collection.get(name=\"covid_normal_lung\")\n", + "# adata = collection.load(join='inner')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n", + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n" + ] + } + ], + "source": [ + "datamodule = MappedCollectionDataModule(\n", + " collection,\n", +<<<<<<< HEAD + " batch_key = \"assay\",\n", + " label_key = \"cell_type\",\n", + " batch_size = 128,\n", + " join = \"inner\",\n", + " parallel = True,\n", +======= + " batch_key=\"assay\",\n", + " label_key=\"cell_type\",\n", + " batch_size=128,\n", + " join=\"inner\",\n", + " parallel=True,\n", +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + ")\n", + "model = scvi.model.SCVI(adata=None, datamodule=datamodule, registry=datamodule.registry)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c5379a25ae8740e084f606c386a63c4c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0%| | 0/1 [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], + "source": [ + "dataloader = datamodule.train_dataloader()\n", + "model.train(max_epochs=1, datamodule=datamodule)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/docrep/decorators.py:43: SyntaxWarning: 'param_datamodule' is not a valid key!\n", + " doc = func(self, args[0].__doc__, *args[1:], **kwargs)\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/docrep/decorators.py:43: SyntaxWarning: 'param_source_registry' is not a valid key!\n", + " doc = func(self, args[0].__doc__, *args[1:], **kwargs)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'function' object has no attribute 'map'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mscvi\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSCANVI\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_scvi_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munlabeled_category\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43munlabeled\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scanvi.py:268\u001b[0m, in \u001b[0;36mSCANVI.from_scvi_model\u001b[0;34m(cls, scvi_model, unlabeled_category, labels_key, adata, registry, **scanvi_kwargs)\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m adata:\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39msetup_anndata(\n\u001b[1;32m 262\u001b[0m adata,\n\u001b[1;32m 263\u001b[0m unlabeled_category\u001b[38;5;241m=\u001b[39munlabeled_category,\n\u001b[1;32m 264\u001b[0m use_minified\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 265\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mscvi_setup_args,\n\u001b[1;32m 266\u001b[0m )\n\u001b[0;32m--> 268\u001b[0m scanvi_model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscvi_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mregistry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnon_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mscanvi_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 269\u001b[0m scvi_state_dict \u001b[38;5;241m=\u001b[39m scvi_model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mstate_dict()\n\u001b[1;32m 270\u001b[0m scanvi_model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mload_state_dict(scvi_state_dict, strict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scanvi.py:131\u001b[0m, in \u001b[0;36mSCANVI.__init__\u001b[0;34m(self, adata, registry, n_hidden, n_latent, n_layers, dropout_rate, dispersion, gene_likelihood, linear_classifier, datamodule, **model_kwargs)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(adata, registry)\n\u001b[1;32m 129\u001b[0m scanvae_model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(model_kwargs)\n\u001b[0;32m--> 131\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_indices_and_labels\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# ignores unlabeled catgegory\u001b[39;00m\n\u001b[1;32m 134\u001b[0m n_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msummary_stats\u001b[38;5;241m.\u001b[39mn_labels \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scanvi.py:289\u001b[0m, in \u001b[0;36mSCANVI._set_indices_and_labels\u001b[0;34m(self, datamodule)\u001b[0m\n\u001b[1;32m 282\u001b[0m labels \u001b[38;5;241m=\u001b[39m get_anndata_attribute(\n\u001b[1;32m 283\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata,\n\u001b[1;32m 284\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata_manager\u001b[38;5;241m.\u001b[39mdata_registry\u001b[38;5;241m.\u001b[39mlabels\u001b[38;5;241m.\u001b[39mattr_name,\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moriginal_label_key,\n\u001b[1;32m 286\u001b[0m )\u001b[38;5;241m.\u001b[39mravel()\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 288\u001b[0m \u001b[38;5;66;03m# for CZI:\u001b[39;00m\n\u001b[0;32m--> 289\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\u001b[43mdatamodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference_dataloader\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m(\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabel\u001b[39m\u001b[38;5;124m\"\u001b[39m]))\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_label_mapping \u001b[38;5;241m=\u001b[39m labels_state_registry\u001b[38;5;241m.\u001b[39mcategorical_mapping\n\u001b[1;32m 292\u001b[0m \u001b[38;5;66;03m# set unlabeled and labeled indices\u001b[39;00m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'function' object has no attribute 'map'" + ] + } + ], + "source": [ + "scvi.model.SCANVI.from_scvi_model(model, adata=False, unlabeled_category=\"unlabeled\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n", + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "data": { + "text/html": [ + "
SCVI model with the following parameters: \n", + "n_hidden: 128, n_latent: 10, n_layers: 1, dropout_rate: 0.1, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: False\n", + "\n" + ], + "text/plain": [ + "SCVI model with the following parameters: \n", + "n_hidden: \u001b[1;36m128\u001b[0m, n_latent: \u001b[1;36m10\u001b[0m, n_layers: \u001b[1;36m1\u001b[0m, dropout_rate: \u001b[1;36m0.1\u001b[0m, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.save(\"lamin_model\", save_anndata=False, overwrite=True)\n", + "model.load(\"lamin_model\", adata=False)\n", + "model.load_query_data(adata=False, reference_model=\"lamin_model\", registry=datamodule.registry)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "adata.obs[\"batch\"] = adata.obs[\"assay\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/data/fields/_base_field.py:63: UserWarning: adata.X does not contain unnormalized count data. Are you sure this is what you want?\n", + " self.validate_field(adata)\n" + ] + } + ], + "source": [ + "model2 = model.load_query_data(adata=adata, reference_model=\"lamin_model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f5fc7b7faf684ca4a57c958e0564d3ec", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0%| | 0/1 [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], + "source": [ + "model2.train(max_epochs=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TTTTTTTT setup_anndata\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/data/fields/_base_field.py:63: UserWarning: adata.X does not contain unnormalized count data. Are you sure this is what you want?\n", + " self.validate_field(adata)\n" + ] + } + ], + "source": [ + "model3 = model.load(\"lamin_model\", adata=ad)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "97b3d8aa95994b059faead86ce8f4872", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0%| | 0/1 [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], + "source": [ + "model3.train(max_epochs=1)\n", + "model3.save(\"lamin_model_anndata\", save_anndata=False, overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model_anndata/model.pt already downloaded \n", + "\u001b[34mINFO \u001b[0m File lamin_model_anndata/model.pt already downloaded \n" + ] + }, + { + "data": { + "text/html": [ + "
SCVI model with the following parameters: \n", + "n_hidden: 128, n_latent: 10, n_layers: 1, dropout_rate: 0.1, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: False\n", + "\n" + ], + "text/plain": [ + "SCVI model with the following parameters: \n", + "n_hidden: \u001b[1;36m128\u001b[0m, n_latent: \u001b[1;36m10\u001b[0m, n_layers: \u001b[1;36m1\u001b[0m, dropout_rate: \u001b[1;36m0.1\u001b[0m, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = datamodule.inference_dataloader()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ +<<<<<<< HEAD + "#_ = model.get_elbo(dataloader=dataloader)\n", + "#_ = model.get_marginal_ll(dataloader=dataloader)\n", + "#_ = model.get_reconstruction_error(dataloader=dataloader)\n", +======= + "# _ = model.get_elbo(dataloader=dataloader)\n", + "# _ = model.get_marginal_ll(dataloader=dataloader)\n", + "# _ = model.get_reconstruction_error(dataloader=dataloader)\n", +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + "_ = model.get_latent_representation(dataloader=dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_base_model.py:796: UserWarning: `var_names` for the loaded `model` does not match those used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Please set up your AnnData with SCVI.setup_anndata first.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlamin_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mad\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:800\u001b[0m, in \u001b[0;36mBaseModelClass.load\u001b[0;34m(cls, dir_path, adata, accelerator, device, prefix, backup_url)\u001b[0m\n\u001b[1;32m 797\u001b[0m method_name \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget(_SETUP_METHOD_NAME, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msetup_anndata\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 798\u001b[0m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, method_name)(adata, source_registry\u001b[38;5;241m=\u001b[39mregistry, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mregistry[_SETUP_ARGS_KEY])\n\u001b[0;32m--> 800\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43m_initialize_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr_dict\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 801\u001b[0m model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mon_load(model)\n\u001b[1;32m 802\u001b[0m model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mload_state_dict(model_state_dict)\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_save_load.py:140\u001b[0m, in \u001b[0;36m_initialize_model\u001b[0;34m(cls, adata, registry, attr_dict)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m adata:\n\u001b[1;32m 138\u001b[0m adata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 140\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregistry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnon_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m attr, val \u001b[38;5;129;01min\u001b[39;00m attr_dict\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28msetattr\u001b[39m(model, attr, val)\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scvi.py:130\u001b[0m, in \u001b[0;36mSCVI.__init__\u001b[0;34m(self, adata, registry, n_hidden, n_latent, n_layers, dropout_rate, dispersion, gene_likelihood, latent_distribution, datamodule, **kwargs)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 118\u001b[0m adata: AnnData \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 129\u001b[0m ):\n\u001b[0;32m--> 130\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_module_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 133\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_hidden\u001b[39m\u001b[38;5;124m\"\u001b[39m: n_hidden,\n\u001b[1;32m 134\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_latent\u001b[39m\u001b[38;5;124m\"\u001b[39m: n_latent,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 141\u001b[0m }\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model_summary_string \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 143\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSCVI model with the following parameters: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_hidden: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_hidden\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, n_latent: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_latent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, n_layers: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_layers\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdropout_rate: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdropout_rate\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, dispersion: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdispersion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgene_likelihood: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mgene_likelihood\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, latent_distribution: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlatent_distribution\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 147\u001b[0m )\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:125\u001b[0m, in \u001b[0;36mBaseModelClass.__init__\u001b[0;34m(self, adata, registry)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m adata \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_adata \u001b[38;5;241m=\u001b[39m adata\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_adata_manager \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_most_recent_anndata_manager\u001b[49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequired\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_register_manager_for_instance(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata_manager)\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# Suffix registry instance variable with _ to include it when saving the model.\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:360\u001b[0m, in \u001b[0;36mBaseModelClass._get_most_recent_anndata_manager\u001b[0;34m(cls, adata, required)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _SCVI_UUID_KEY \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m adata\u001b[38;5;241m.\u001b[39muns:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m required:\n\u001b[0;32m--> 360\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 361\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease set up your AnnData with \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.setup_anndata first.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 362\u001b[0m )\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 365\u001b[0m adata_id \u001b[38;5;241m=\u001b[39m adata\u001b[38;5;241m.\u001b[39muns[_SCVI_UUID_KEY]\n", + "\u001b[0;31mValueError\u001b[0m: Please set up your AnnData with SCVI.setup_anndata first." + ] + } + ], + "source": [ + "model.load(\"lamin_model\", adata=ad)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ +<<<<<<< HEAD + "model.registry['field_registries']['X']['state_registry']['column_names']" +======= + "model.registry[\"field_registries\"][\"X\"][\"state_registry\"][\"column_names\"]" +>>>>>>> 1a4c796846258a0f21cb9aa1975df079a79d3509 + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ENSG00000237491',\n", + " 'ENSG00000225880',\n", + " 'ENSG00000230368',\n", + " 'ENSG00000187634',\n", + " 'ENSG00000188976',\n", + " 'ENSG00000187961',\n", + " 'ENSG00000188290',\n", + " 'ENSG00000187608',\n", + " 'ENSG00000188157',\n", + " 'ENSG00000078808',\n", + " 'ENSG00000176022',\n", + " 'ENSG00000160087',\n", + " 'ENSG00000162572',\n", + " 'ENSG00000131584',\n", + " 'ENSG00000169972',\n", + " 'ENSG00000127054',\n", + " 'ENSG00000224051',\n", + " 'ENSG00000107404',\n", + " 'ENSG00000162576',\n", + " 'ENSG00000175756',\n", + " 'ENSG00000221978',\n", + " 'ENSG00000242485',\n", + " 'ENSG00000272455',\n", + " 'ENSG00000235098',\n", + " 'ENSG00000179403',\n", + " 'ENSG00000160072',\n", + " 'ENSG00000197785',\n", + " 'ENSG00000160075',\n", + " 'ENSG00000215014',\n", + " 'ENSG00000228594',\n", + " 'ENSG00000197530',\n", + " 'ENSG00000189409',\n", + " 'ENSG00000248333',\n", + " 'ENSG00000189339',\n", + " 'ENSG00000008128',\n", + " 'ENSG00000215790',\n", + " 'ENSG00000008130',\n", + " 'ENSG00000078369',\n", + " 'ENSG00000178821',\n", + " 'ENSG00000067606',\n", + " 'ENSG00000162585',\n", + " 'ENSG00000157933',\n", + " 'ENSG00000116151',\n", + " 'ENSG00000157916',\n", + " 'ENSG00000157911',\n", + " 'ENSG00000272449',\n", + " 'ENSG00000162591',\n", + " 'ENSG00000158109',\n", + " 'ENSG00000116213',\n", + " 'ENSG00000078900',\n", + " 'ENSG00000235169',\n", + " 'ENSG00000130764',\n", + " 'ENSG00000116198',\n", + " 'ENSG00000169598',\n", + " 'ENSG00000131697',\n", + " 'ENSG00000069424',\n", + " 'ENSG00000116251',\n", + " 'ENSG00000116237',\n", + " 'ENSG00000158292',\n", + " 'ENSG00000097021',\n", + " 'ENSG00000187017',\n", + " 'ENSG00000215788',\n", + " 'ENSG00000171680',\n", + " 'ENSG00000162408',\n", + " 'ENSG00000204859',\n", + " 'ENSG00000162413',\n", + " 'ENSG00000116273',\n", + " 'ENSG00000041988',\n", + " 'ENSG00000007923',\n", + " 'ENSG00000171735',\n", + " 'ENSG00000049245',\n", + " 'ENSG00000049246',\n", + " 'ENSG00000116288',\n", + " 'ENSG00000116285',\n", + " 'ENSG00000142599',\n", + " 'ENSG00000074800',\n", + " 'ENSG00000180758',\n", + " 'ENSG00000049239',\n", + " 'ENSG00000171621',\n", + " 'ENSG00000171612',\n", + " 'ENSG00000188807',\n", + " 'ENSG00000171608',\n", + " 'ENSG00000171603',\n", + " 'ENSG00000178585',\n", + " 'ENSG00000162441',\n", + " 'ENSG00000173614',\n", + " 'ENSG00000130939',\n", + " 'ENSG00000054523',\n", + " 'ENSG00000142657',\n", + " 'ENSG00000251503',\n", + " 'ENSG00000160049',\n", + " 'ENSG00000142655',\n", + " 'ENSG00000130940',\n", + " 'ENSG00000120948',\n", + " 'ENSG00000116649',\n", + " 'ENSG00000171824',\n", + " 'ENSG00000198793',\n", + " 'ENSG00000120942',\n", + " 'ENSG00000204624',\n", + " 'ENSG00000116661',\n", + " 'ENSG00000132879',\n", + " 'ENSG00000116663',\n", + " 'ENSG00000116670',\n", + " 'ENSG00000177674',\n", + " 'ENSG00000177000',\n", + " 'ENSG00000011021',\n", + " 'ENSG00000116685',\n", + " 'ENSG00000083444',\n", + " 'ENSG00000116688',\n", + " 'ENSG00000116691',\n", + " 'ENSG00000028137',\n", + " 'ENSG00000048707',\n", + " 'ENSG00000162496',\n", + " 'ENSG00000162493',\n", + " 'ENSG00000116731',\n", + " 'ENSG00000189337',\n", + " 'ENSG00000171729',\n", + " 'ENSG00000142634',\n", + " 'ENSG00000132906',\n", + " 'ENSG00000116138',\n", + " 'ENSG00000197312',\n", + " 'ENSG00000116786',\n", + " 'ENSG00000162461',\n", + " 'ENSG00000162458',\n", + " 'ENSG00000065526',\n", + " 'ENSG00000116809',\n", + " 'ENSG00000142627',\n", + " 'ENSG00000142632',\n", + " 'ENSG00000132881',\n", + " 'ENSG00000037637',\n", + " 'ENSG00000055070',\n", + " 'ENSG00000157191',\n", + " 'ENSG00000261135',\n", + " 'ENSG00000224174',\n", + " 'ENSG00000219481',\n", + " 'ENSG00000058453',\n", + " 'ENSG00000238142',\n", + " 'ENSG00000272426',\n", + " 'ENSG00000117122',\n", + " 'ENSG00000159363',\n", + " 'ENSG00000117118',\n", + " 'ENSG00000169991',\n", + " 'ENSG00000127481',\n", + " 'ENSG00000127463',\n", + " 'ENSG00000053372',\n", + " 'ENSG00000053371',\n", + " 'ENSG00000040487',\n", + " 'ENSG00000077549',\n", + " 'ENSG00000158747',\n", + " 'ENSG00000162542',\n", + " 'ENSG00000169914',\n", + " 'ENSG00000162543',\n", + " 'ENSG00000162545',\n", + " 'ENSG00000090432',\n", + " 'ENSG00000158828',\n", + " 'ENSG00000244038',\n", + " 'ENSG00000127483',\n", + " 'ENSG00000075151',\n", + " 'ENSG00000117298',\n", + " 'ENSG00000142794',\n", + " 'ENSG00000090686',\n", + " 'ENSG00000142798',\n", + " 'ENSG00000070831',\n", + " 'ENSG00000184677',\n", + " 'ENSG00000173372',\n", + " 'ENSG00000159189',\n", + " 'ENSG00000173369',\n", + " 'ENSG00000133216',\n", + " 'ENSG00000004487',\n", + " 'ENSG00000169641',\n", + " 'ENSG00000142676',\n", + " 'ENSG00000011007',\n", + " 'ENSG00000057757',\n", + " 'ENSG00000011009',\n", + " 'ENSG00000117308',\n", + " 'ENSG00000117305',\n", + " 'ENSG00000179163',\n", + " 'ENSG00000189266',\n", + " 'ENSG00000188529',\n", + " 'ENSG00000185436',\n", + " 'ENSG00000001460',\n", + " 'ENSG00000001461',\n", + " 'ENSG00000117602',\n", + " 'ENSG00000133226',\n", + " 'ENSG00000169504',\n", + " 'ENSG00000117614',\n", + " 'ENSG00000117616',\n", + " 'ENSG00000183726',\n", + " 'ENSG00000188672',\n", + " 'ENSG00000204178',\n", + " 'ENSG00000157978',\n", + " 'ENSG00000117643',\n", + " 'ENSG00000162430',\n", + " 'ENSG00000117640',\n", + " 'ENSG00000127423',\n", + " 'ENSG00000182749',\n", + " 'ENSG00000117632',\n", + " 'ENSG00000158006',\n", + " 'ENSG00000158008',\n", + " 'ENSG00000175087',\n", + " 'ENSG00000142684',\n", + " 'ENSG00000142675',\n", + " 'ENSG00000130695',\n", + " 'ENSG00000142669',\n", + " 'ENSG00000158062',\n", + " 'ENSG00000117682',\n", + " 'ENSG00000198830',\n", + " 'ENSG00000117713',\n", + " 'ENSG00000060642',\n", + " 'ENSG00000204160',\n", + " 'ENSG00000142751',\n", + " 'ENSG00000198746',\n", + " 'ENSG00000090273',\n", + " 'ENSG00000158246',\n", + " 'ENSG00000090020',\n", + " 'ENSG00000142784',\n", + " 'ENSG00000186501',\n", + " 'ENSG00000142765',\n", + " 'ENSG00000142733',\n", + " 'ENSG00000158195',\n", + " 'ENSG00000126705',\n", + " 'ENSG00000126709',\n", + " 'ENSG00000009780',\n", + " 'ENSG00000117758',\n", + " 'ENSG00000117751',\n", + " 'ENSG00000130775',\n", + " 'ENSG00000117748',\n", + " 'ENSG00000130768',\n", + " 'ENSG00000158156',\n", + " 'ENSG00000158161',\n", + " 'ENSG00000126698',\n", + " 'ENSG00000204138',\n", + " 'ENSG00000180198',\n", + " 'ENSG00000180098',\n", + " 'ENSG00000120656',\n", + " 'ENSG00000188060',\n", + " 'ENSG00000162419',\n", + " 'ENSG00000198492',\n", + " 'ENSG00000159023',\n", + " 'ENSG00000253304',\n", + " 'ENSG00000116350',\n", + " 'ENSG00000116353',\n", + " 'ENSG00000060656',\n", + " 'ENSG00000162511',\n", + " 'ENSG00000162512',\n", + " 'ENSG00000134644',\n", + " 'ENSG00000060688',\n", + " 'ENSG00000121766',\n", + " 'ENSG00000142910',\n", + " 'ENSG00000162517',\n", + " 'ENSG00000084636',\n", + " 'ENSG00000121753',\n", + " 'ENSG00000184007',\n", + " 'ENSG00000121774',\n", + " 'ENSG00000121775',\n", + " 'ENSG00000025800',\n", + " 'ENSG00000084652',\n", + " 'ENSG00000160050',\n", + " 'ENSG00000160051',\n", + " 'ENSG00000160055',\n", + " 'ENSG00000084623',\n", + " 'ENSG00000116478',\n", + " 'ENSG00000175130',\n", + " 'ENSG00000225828',\n", + " 'ENSG00000160058',\n", + " 'ENSG00000160062',\n", + " 'ENSG00000176261',\n", + " 'ENSG00000162521',\n", + " 'ENSG00000162520',\n", + " 'ENSG00000162522',\n", + " 'ENSG00000134684',\n", + " 'ENSG00000116497',\n", + " 'ENSG00000116514',\n", + " 'ENSG00000004455',\n", + " 'ENSG00000142920',\n", + " 'ENSG00000116525',\n", + " 'ENSG00000160094',\n", + " 'ENSG00000134686',\n", + " 'ENSG00000121903',\n", + " 'ENSG00000163866',\n", + " 'ENSG00000187513',\n", + " 'ENSG00000116544',\n", + " 'ENSG00000271741',\n", + " 'ENSG00000163867',\n", + " 'ENSG00000197056',\n", + " 'ENSG00000116560',\n", + " 'ENSG00000146463',\n", + " 'ENSG00000142687',\n", + " 'ENSG00000020129',\n", + " 'ENSG00000126067',\n", + " 'ENSG00000092853',\n", + " 'ENSG00000134698',\n", + " 'ENSG00000092847',\n", + " 'ENSG00000126070',\n", + " 'ENSG00000092850',\n", + " 'ENSG00000116863',\n", + " 'ENSG00000054116',\n", + " 'ENSG00000116871',\n", + " 'ENSG00000054118',\n", + " 'ENSG00000214193',\n", + " 'ENSG00000142694',\n", + " 'ENSG00000196182',\n", + " 'ENSG00000181817',\n", + " 'ENSG00000116885',\n", + " 'ENSG00000116898',\n", + " 'ENSG00000163874',\n", + " 'ENSG00000163875',\n", + " 'ENSG00000163877',\n", + " 'ENSG00000163879',\n", + " 'ENSG00000134697',\n", + " 'ENSG00000134690',\n", + " 'ENSG00000196449',\n", + " 'ENSG00000188786',\n", + " 'ENSG00000204084',\n", + " 'ENSG00000183431',\n", + " 'ENSG00000183386',\n", + " 'ENSG00000183520',\n", + " 'ENSG00000116954',\n", + " 'ENSG00000214114',\n", + " 'ENSG00000174574',\n", + " 'ENSG00000168653',\n", + " 'ENSG00000127603',\n", + " 'ENSG00000090621',\n", + " 'ENSG00000163909',\n", + " 'ENSG00000084072',\n", + " 'ENSG00000043514',\n", + " 'ENSG00000116990',\n", + " 'ENSG00000168389',\n", + " 'ENSG00000131236',\n", + " 'ENSG00000131238',\n", + " 'ENSG00000117000',\n", + " 'ENSG00000084073',\n", + " 'ENSG00000049089',\n", + " 'ENSG00000084070',\n", + " 'ENSG00000187801',\n", + " 'ENSG00000187815',\n", + " 'ENSG00000164002',\n", + " 'ENSG00000117010',\n", + " 'ENSG00000117016',\n", + " 'ENSG00000066136',\n", + " 'ENSG00000117013',\n", + " 'ENSG00000179862',\n", + " 'ENSG00000171793',\n", + " 'ENSG00000010803',\n", + " 'ENSG00000127124',\n", + " 'ENSG00000198815',\n", + " 'ENSG00000066185',\n", + " 'ENSG00000127125',\n", + " 'ENSG00000186409',\n", + " 'ENSG00000171960',\n", + " 'ENSG00000065978',\n", + " 'ENSG00000117385',\n", + " 'ENSG00000177868',\n", + " 'ENSG00000164010',\n", + " 'ENSG00000164011',\n", + " 'ENSG00000117394',\n", + " 'ENSG00000227533',\n", + " 'ENSG00000117395',\n", + " 'ENSG00000066056',\n", + " 'ENSG00000117399',\n", + " 'ENSG00000066322',\n", + " 'ENSG00000159479',\n", + " 'ENSG00000198198',\n", + " 'ENSG00000178922',\n", + " 'ENSG00000142949',\n", + " 'ENSG00000126091',\n", + " 'ENSG00000117408',\n", + " 'ENSG00000132768',\n", + " 'ENSG00000117410',\n", + " 'ENSG00000117411',\n", + " 'ENSG00000159214',\n", + " 'ENSG00000196517',\n", + " 'ENSG00000230615',\n", + " 'ENSG00000178028',\n", + " 'ENSG00000117419',\n", + " 'ENSG00000187147',\n", + " 'ENSG00000126106',\n", + " 'ENSG00000142945',\n", + " 'ENSG00000142937',\n", + " 'ENSG00000142959',\n", + " 'ENSG00000173846',\n", + " 'ENSG00000222009',\n", + " 'ENSG00000117425',\n", + " 'ENSG00000070785',\n", + " 'ENSG00000126107',\n", + " 'ENSG00000126088',\n", + " 'ENSG00000162415',\n", + " 'ENSG00000186603',\n", + " 'ENSG00000132781',\n", + " 'ENSG00000132773',\n", + " 'ENSG00000070759',\n", + " 'ENSG00000132763',\n", + " 'ENSG00000117450',\n", + " 'ENSG00000117448',\n", + " 'ENSG00000132780',\n", + " 'ENSG00000159592',\n", + " 'ENSG00000159596',\n", + " 'ENSG00000197429',\n", + " 'ENSG00000086015',\n", + " 'ENSG00000117472',\n", + " 'ENSG00000085998',\n", + " 'ENSG00000171357',\n", + " 'ENSG00000085999',\n", + " 'ENSG00000132128',\n", + " 'ENSG00000173660',\n", + " 'ENSG00000117481',\n", + " 'ENSG00000117480',\n", + " 'ENSG00000079277',\n", + " 'ENSG00000123472',\n", + " 'ENSG00000159658',\n", + " 'ENSG00000123473',\n", + " 'ENSG00000162368',\n", + " 'ENSG00000132122',\n", + " 'ENSG00000185104',\n", + " 'ENSG00000123080',\n", + " 'ENSG00000123091',\n", + " 'ENSG00000085832',\n", + " 'ENSG00000117859',\n", + " 'ENSG00000078618',\n", + " 'ENSG00000169213',\n", + " 'ENSG00000117862',\n", + " 'ENSG00000134717',\n", + " 'ENSG00000157077',\n", + " 'ENSG00000154222',\n", + " 'ENSG00000085840',\n", + " 'ENSG00000134748',\n", + " 'ENSG00000134744',\n", + " 'ENSG00000116157',\n", + " 'ENSG00000182183',\n", + " 'ENSG00000162377',\n", + " 'ENSG00000162378',\n", + " 'ENSG00000121310',\n", + " 'ENSG00000116171',\n", + " 'ENSG00000157184',\n", + " 'ENSG00000162385',\n", + " 'ENSG00000157193',\n", + " 'ENSG00000058804',\n", + " 'ENSG00000058799',\n", + " 'ENSG00000081870',\n", + " 'ENSG00000116212',\n", + " 'ENSG00000116209',\n", + " 'ENSG00000116205',\n", + " 'ENSG00000116221',\n", + " 'ENSG00000157216',\n", + " 'ENSG00000162390',\n", + " 'ENSG00000243725',\n", + " 'ENSG00000162396',\n", + " 'ENSG00000006555',\n", + " 'ENSG00000116133',\n", + " 'ENSG00000162402',\n", + " 'ENSG00000162407',\n", + " 'ENSG00000184292',\n", + " 'ENSG00000162601',\n", + " 'ENSG00000177606',\n", + " 'ENSG00000172456',\n", + " 'ENSG00000134709',\n", + " 'ENSG00000162599',\n", + " 'ENSG00000162604',\n", + " 'ENSG00000132849',\n", + " 'ENSG00000240563',\n", + " 'ENSG00000132854',\n", + " 'ENSG00000162607',\n", + " 'ENSG00000116641',\n", + " 'ENSG00000125703',\n", + " 'ENSG00000088035',\n", + " 'ENSG00000142856',\n", + " 'ENSG00000203965',\n", + " 'ENSG00000079739',\n", + " 'ENSG00000185483',\n", + " 'ENSG00000158966',\n", + " 'ENSG00000162437',\n", + " 'ENSG00000162434',\n", + " 'ENSG00000162433',\n", + " 'ENSG00000116675',\n", + " 'ENSG00000116678',\n", + " 'ENSG00000184588',\n", + " 'ENSG00000118473',\n", + " 'ENSG00000152760',\n", + " 'ENSG00000152763',\n", + " 'ENSG00000198160',\n", + " 'ENSG00000116704',\n", + " 'ENSG00000142864',\n", + " 'ENSG00000116717',\n", + " 'ENSG00000172380',\n", + " 'ENSG00000116729',\n", + " 'ENSG00000024526',\n", + " 'ENSG00000033122',\n", + " 'ENSG00000066557',\n", + " 'ENSG00000116754',\n", + " 'ENSG00000118454',\n", + " 'ENSG00000197568',\n", + " 'ENSG00000116761',\n", + " 'ENSG00000132485',\n", + " 'ENSG00000172260',\n", + " 'ENSG00000162620',\n", + " 'ENSG00000116791',\n", + " 'ENSG00000162623',\n", + " 'ENSG00000137968',\n", + " 'ENSG00000117054',\n", + " 'ENSG00000137955',\n", + " 'ENSG00000184005',\n", + " 'ENSG00000117069',\n", + " 'ENSG00000142892',\n", + " 'ENSG00000154027',\n", + " 'ENSG00000036549',\n", + " 'ENSG00000077254',\n", + " 'ENSG00000162614',\n", + " 'ENSG00000162613',\n", + " 'ENSG00000162616',\n", + " 'ENSG00000137959',\n", + " 'ENSG00000137965',\n", + " 'ENSG00000117114',\n", + " 'ENSG00000137941',\n", + " 'ENSG00000271576',\n", + " 'ENSG00000142875',\n", + " 'ENSG00000203943',\n", + " 'ENSG00000117133',\n", + " 'ENSG00000174021',\n", + " 'ENSG00000117151',\n", + " 'ENSG00000117155',\n", + " 'ENSG00000097096',\n", + " 'ENSG00000142867',\n", + " 'ENSG00000153904',\n", + " 'ENSG00000142871',\n", + " 'ENSG00000117174',\n", + " 'ENSG00000171502',\n", + " 'ENSG00000122417',\n", + " 'ENSG00000097033',\n", + " 'ENSG00000183291',\n", + " 'ENSG00000153936',\n", + " 'ENSG00000143013',\n", + " 'ENSG00000065243',\n", + " 'ENSG00000137947',\n", + " 'ENSG00000137944',\n", + " 'ENSG00000117226',\n", + " 'ENSG00000117228',\n", + " 'ENSG00000162654',\n", + " 'ENSG00000197147',\n", + " 'ENSG00000272931',\n", + " 'ENSG00000171492',\n", + " 'ENSG00000162664',\n", + " 'ENSG00000122482',\n", + " 'ENSG00000162669',\n", + " 'ENSG00000097046',\n", + " 'ENSG00000069702',\n", + " 'ENSG00000189195',\n", + " 'ENSG00000174842',\n", + " 'ENSG00000122484',\n", + " 'ENSG00000067208',\n", + " 'ENSG00000122406',\n", + " 'ENSG00000154511',\n", + " 'ENSG00000143033',\n", + " 'ENSG00000117500',\n", + " 'ENSG00000122483',\n", + " 'ENSG00000117505',\n", + " 'ENSG00000137942',\n", + " 'ENSG00000137936',\n", + " 'ENSG00000067334',\n", + " 'ENSG00000023909',\n", + " 'ENSG00000137962',\n", + " 'ENSG00000117528',\n", + " 'ENSG00000117525',\n", + " 'ENSG00000117519',\n", + " 'ENSG00000172339',\n", + " 'ENSG00000152078',\n", + " 'ENSG00000117569',\n", + " 'ENSG00000188641',\n", + " 'ENSG00000162627',\n", + " 'ENSG00000117598',\n", + " 'ENSG00000117600',\n", + " 'ENSG00000099260',\n", + " 'ENSG00000156869',\n", + " 'ENSG00000162688',\n", + " 'ENSG00000117620',\n", + " 'ENSG00000156875',\n", + " 'ENSG00000156876',\n", + " 'ENSG00000122435',\n", + " 'ENSG00000137992',\n", + " 'ENSG00000137996',\n", + " 'ENSG00000079335',\n", + " 'ENSG00000162692',\n", + " 'ENSG00000162694',\n", + " 'ENSG00000162695',\n", + " 'ENSG00000117543',\n", + " 'ENSG00000170989',\n", + " 'ENSG00000185946',\n", + " 'ENSG00000240038',\n", + " 'ENSG00000198890',\n", + " 'ENSG00000162631',\n", + " 'ENSG00000134215',\n", + " 'ENSG00000162636',\n", + " 'ENSG00000162639',\n", + " 'ENSG00000134186',\n", + " 'ENSG00000116266',\n", + " 'ENSG00000121957',\n", + " 'ENSG00000085433',\n", + " 'ENSG00000197780',\n", + " 'ENSG00000215717',\n", + " 'ENSG00000116299',\n", + " 'ENSG00000031698',\n", + " 'ENSG00000143126',\n", + " 'ENSG00000134222',\n", + " 'ENSG00000134243',\n", + " 'ENSG00000143106',\n", + " 'ENSG00000162650',\n", + " 'ENSG00000174151',\n", + " 'ENSG00000065135',\n", + " 'ENSG00000116337',\n", + " 'ENSG00000168765',\n", + " 'ENSG00000213366',\n", + " 'ENSG00000134201',\n", + " 'ENSG00000134202',\n", + " 'ENSG00000184371',\n", + " 'ENSG00000168710',\n", + " 'ENSG00000143093',\n", + " 'ENSG00000116396',\n", + " 'ENSG00000162775',\n", + " 'ENSG00000168679',\n", + " 'ENSG00000134248',\n", + " 'ENSG00000143119',\n", + " 'ENSG00000121931',\n", + " 'ENSG00000156171',\n", + " 'ENSG00000134255',\n", + " 'ENSG00000116455',\n", + " 'ENSG00000116459',\n", + " 'ENSG00000116473',\n", + " 'ENSG00000197852',\n", + " 'ENSG00000064703',\n", + " 'ENSG00000143079',\n", + " 'ENSG00000134245',\n", + " 'ENSG00000007341',\n", + " 'ENSG00000116489',\n", + " 'ENSG00000155363',\n", + " 'ENSG00000155366',\n", + " 'ENSG00000238198',\n", + " 'ENSG00000198799',\n", + " 'ENSG00000081026',\n", + " 'ENSG00000116793',\n", + " 'ENSG00000081019',\n", + " 'ENSG00000134262',\n", + " 'ENSG00000118655',\n", + " 'ENSG00000163349',\n", + " 'ENSG00000116774',\n", + " 'ENSG00000134207',\n", + " 'ENSG00000197323',\n", + " 'ENSG00000116752',\n", + " 'ENSG00000175984',\n", + " 'ENSG00000213281',\n", + " 'ENSG00000009307',\n", + " 'ENSG00000052723',\n", + " 'ENSG00000134198',\n", + " 'ENSG00000173218',\n", + " 'ENSG00000163393',\n", + " 'ENSG00000163399',\n", + " 'ENSG00000116815',\n", + " 'ENSG00000143061',\n", + " 'ENSG00000134247',\n", + " 'ENSG00000116830',\n", + " 'ENSG00000134253',\n", + " 'ENSG00000198162',\n", + " 'ENSG00000183508',\n", + " 'ENSG00000196505',\n", + " 'ENSG00000065183',\n", + " 'ENSG00000155761',\n", + " 'ENSG00000116874',\n", + " 'ENSG00000143067',\n", + " 'ENSG00000092621',\n", + " 'ENSG00000134250',\n", + " 'ENSG00000226067',\n", + " 'ENSG00000188610',\n", + " 'ENSG00000232527',\n", + " 'ENSG00000215784',\n", + " 'ENSG00000162825',\n", + " 'ENSG00000117262',\n", + " 'ENSG00000186141',\n", + " 'ENSG00000186364',\n", + " 'ENSG00000131788',\n", + " 'ENSG00000198483',\n", + " 'ENSG00000143127',\n", + " 'ENSG00000131779',\n", + " 'ENSG00000121851',\n", + " 'ENSG00000131791',\n", + " 'ENSG00000131781',\n", + " 'ENSG00000131778',\n", + " 'ENSG00000116128',\n", + " 'ENSG00000162836',\n", + " 'ENSG00000188092',\n", + " 'ENSG00000178104',\n", + " 'ENSG00000203814',\n", + " 'ENSG00000183598',\n", + " 'ENSG00000288825',\n", + " 'ENSG00000184678',\n", + " 'ENSG00000184260',\n", + " 'ENSG00000184270',\n", + " 'ENSG00000178096',\n", + " 'ENSG00000159164',\n", + " 'ENSG00000143368',\n", + " 'ENSG00000014914',\n", + " 'ENSG00000136631',\n", + " 'ENSG00000023902',\n", + " 'ENSG00000143401',\n", + " 'ENSG00000117362',\n", + " 'ENSG00000159208',\n", + " 'ENSG00000117360',\n", + " 'ENSG00000163125',\n", + " 'ENSG00000143374',\n", + " 'ENSG00000143369',\n", + " 'ENSG00000228126',\n", + " 'ENSG00000143382',\n", + " 'ENSG00000143384',\n", + " 'ENSG00000143420',\n", + " 'ENSG00000143457',\n", + " 'ENSG00000163131',\n", + " 'ENSG00000143387',\n", + " 'ENSG00000143437',\n", + " 'ENSG00000143379',\n", + " 'ENSG00000143418',\n", + " 'ENSG00000143409',\n", + " 'ENSG00000143363',\n", + " 'ENSG00000197622',\n", + " 'ENSG00000213190',\n", + " 'ENSG00000143458',\n", + " 'ENSG00000143434',\n", + " 'ENSG00000163154',\n", + " 'ENSG00000163156',\n", + " 'ENSG00000163155',\n", + " 'ENSG00000163159',\n", + " 'ENSG00000143398',\n", + " 'ENSG00000159352',\n", + " 'ENSG00000143373',\n", + " 'ENSG00000143393',\n", + " 'ENSG00000143390',\n", + " 'ENSG00000143416',\n", + " 'ENSG00000159377',\n", + " 'ENSG00000143442',\n", + " 'ENSG00000143375',\n", + " 'ENSG00000143367',\n", + " 'ENSG00000143376',\n", + " 'ENSG00000178796',\n", + " 'ENSG00000143436',\n", + " 'ENSG00000143450',\n", + " 'ENSG00000182134',\n", + " 'ENSG00000159445',\n", + " 'ENSG00000197747',\n", + " 'ENSG00000163191',\n", + " 'ENSG00000197956',\n", + " 'ENSG00000196154',\n", + " 'ENSG00000196754',\n", + " 'ENSG00000188643',\n", + " 'ENSG00000189334',\n", + " 'ENSG00000189171',\n", + " 'ENSG00000160678',\n", + " 'ENSG00000160679',\n", + " 'ENSG00000143553',\n", + " 'ENSG00000143621',\n", + " 'ENSG00000198837',\n", + " 'ENSG00000160741',\n", + " 'ENSG00000143570',\n", + " 'ENSG00000143578',\n", + " 'ENSG00000143543',\n", + " 'ENSG00000272654',\n", + " 'ENSG00000143545',\n", + " 'ENSG00000177954',\n", + " 'ENSG00000143549',\n", + " 'ENSG00000143569',\n", + " 'ENSG00000143575',\n", + " 'ENSG00000143515',\n", + " 'ENSG00000160712',\n", + " 'ENSG00000169291',\n", + " 'ENSG00000160714',\n", + " 'ENSG00000160710',\n", + " 'ENSG00000163344',\n", + " 'ENSG00000270361',\n", + " 'ENSG00000163346',\n", + " 'ENSG00000163348',\n", + " 'ENSG00000160691',\n", + " 'ENSG00000160688',\n", + " 'ENSG00000160685',\n", + " 'ENSG00000143537',\n", + " 'ENSG00000143590',\n", + " 'ENSG00000169242',\n", + " 'ENSG00000169241',\n", + " 'ENSG00000179085',\n", + " 'ENSG00000163463',\n", + " 'ENSG00000163462',\n", + " 'ENSG00000185499',\n", + " 'ENSG00000169231',\n", + " 'ENSG00000173171',\n", + " 'ENSG00000160752',\n", + " 'ENSG00000160753',\n", + " 'ENSG00000116539',\n", + " 'ENSG00000125459',\n", + " 'ENSG00000163374',\n", + " 'ENSG00000132676',\n", + " 'ENSG00000116580',\n", + " 'ENSG00000132718',\n", + " 'ENSG00000143622',\n", + " 'ENSG00000132680',\n", + " 'ENSG00000116584',\n", + " 'ENSG00000163479',\n", + " 'ENSG00000160803',\n", + " 'ENSG00000116586',\n", + " 'ENSG00000132698',\n", + " 'ENSG00000254726',\n", + " 'ENSG00000160789',\n", + " 'ENSG00000196189',\n", + " 'ENSG00000160785',\n", + " 'ENSG00000260238',\n", + " 'ENSG00000160781',\n", + " 'ENSG00000198952',\n", + " 'ENSG00000163472',\n", + " 'ENSG00000198715',\n", + " 'ENSG00000163468',\n", + " 'ENSG00000163467',\n", + " 'ENSG00000116604',\n", + " 'ENSG00000183856',\n", + " 'ENSG00000163382',\n", + " 'ENSG00000160818',\n", + " 'ENSG00000132688',\n", + " 'ENSG00000143320',\n", + " 'ENSG00000143319',\n", + " 'ENSG00000143303',\n", + " 'ENSG00000143314',\n", + " 'ENSG00000143321',\n", + " 'ENSG00000143294',\n", + " 'ENSG00000187800',\n", + " 'ENSG00000132694',\n", + " 'ENSG00000253831',\n", + " 'ENSG00000117036',\n", + " 'ENSG00000183853',\n", + " 'ENSG00000163565',\n", + " 'ENSG00000158716',\n", + " 'ENSG00000158710',\n", + " 'ENSG00000085552',\n", + " 'ENSG00000162729',\n", + " 'ENSG00000018625',\n", + " 'ENSG00000162734',\n", + " 'ENSG00000132716',\n", + " 'ENSG00000162735',\n", + " 'ENSG00000122218',\n", + " 'ENSG00000162736',\n", + " 'ENSG00000162738',\n", + " 'ENSG00000066294',\n", + " 'ENSG00000158769',\n", + " 'ENSG00000215845',\n", + " 'ENSG00000158773',\n", + " 'ENSG00000162755',\n", + " 'ENSG00000143256',\n", + " 'ENSG00000158793',\n", + " 'ENSG00000158796',\n", + " 'ENSG00000143222',\n", + " 'ENSG00000143258',\n", + " 'ENSG00000143224',\n", + " 'ENSG00000158850',\n", + " 'ENSG00000158859',\n", + " 'ENSG00000158864',\n", + " 'ENSG00000158869',\n", + " 'ENSG00000158882',\n", + " 'ENSG00000158887',\n", + " 'ENSG00000143252',\n", + " 'ENSG00000143226',\n", + " 'ENSG00000173110',\n", + " 'ENSG00000162746',\n", + " 'ENSG00000081721',\n", + " 'ENSG00000118217',\n", + " 'ENSG00000198929',\n", + " 'ENSG00000152332',\n", + " 'ENSG00000117143',\n", + " 'ENSG00000162733',\n", + " 'ENSG00000132196',\n", + " 'ENSG00000143228',\n", + " 'ENSG00000185630',\n", + " 'ENSG00000143171',\n", + " 'ENSG00000143198',\n", + " 'ENSG00000143149',\n", + " 'ENSG00000143183',\n", + " 'ENSG00000143179',\n", + " 'ENSG00000143157',\n", + " 'ENSG00000152382',\n", + " 'ENSG00000143190',\n", + " 'ENSG00000143162',\n", + " 'ENSG00000198771',\n", + " 'ENSG00000197965',\n", + " 'ENSG00000143158',\n", + " 'ENSG00000143164',\n", + " 'ENSG00000143147',\n", + " 'ENSG00000143155',\n", + " 'ENSG00000213064',\n", + " 'ENSG00000143178',\n", + " 'ENSG00000143153',\n", + " 'ENSG00000143156',\n", + " 'ENSG00000117475',\n", + " 'ENSG00000117479',\n", + " 'ENSG00000171806',\n", + " 'ENSG00000000457',\n", + " 'ENSG00000075945',\n", + " 'ENSG00000120370',\n", + " 'ENSG00000116132',\n", + " 'ENSG00000076258',\n", + " 'ENSG00000117523',\n", + " 'ENSG00000117533',\n", + " 'ENSG00000010165',\n", + " 'ENSG00000197959',\n", + " 'ENSG00000135845',\n", + " 'ENSG00000094975',\n", + " 'ENSG00000117592',\n", + " 'ENSG00000076321',\n", + " 'ENSG00000120334',\n", + " 'ENSG00000117593',\n", + " 'ENSG00000185278',\n", + " 'ENSG00000135870',\n", + " 'ENSG00000227373',\n", + " 'ENSG00000152061',\n", + " 'ENSG00000116161',\n", + " 'ENSG00000120333',\n", + " 'ENSG00000143207',\n", + " 'ENSG00000075391',\n", + " 'ENSG00000116191',\n", + " 'ENSG00000116194',\n", + " 'ENSG00000116199',\n", + " 'ENSG00000186283',\n", + " 'ENSG00000143322',\n", + " 'ENSG00000057252',\n", + " 'ENSG00000169905',\n", + " 'ENSG00000272906',\n", + " 'ENSG00000143337',\n", + " 'ENSG00000135837',\n", + " 'ENSG00000116260',\n", + " 'ENSG00000230124',\n", + " 'ENSG00000143324',\n", + " 'ENSG00000135835',\n", + " 'ENSG00000135823',\n", + " 'ENSG00000162783',\n", + " 'ENSG00000135821',\n", + " 'ENSG00000135828',\n", + " 'ENSG00000143333',\n", + " 'ENSG00000135838',\n", + " 'ENSG00000135829',\n", + " 'ENSG00000135862',\n", + " 'ENSG00000058085',\n", + " 'ENSG00000116698',\n", + " 'ENSG00000116701',\n", + " 'ENSG00000162704',\n", + " 'ENSG00000143344',\n", + " 'ENSG00000198756',\n", + " 'ENSG00000198860',\n", + " 'ENSG00000271387',\n", + " 'ENSG00000116406',\n", + " 'ENSG00000135842',\n", + " 'ENSG00000121481',\n", + " 'ENSG00000121486',\n", + " 'ENSG00000116668',\n", + " 'ENSG00000116679',\n", + " 'ENSG00000143341',\n", + " 'ENSG00000047410',\n", + " 'ENSG00000116711',\n", + " 'ENSG00000162670',\n", + " 'ENSG00000116741',\n", + " 'ENSG00000116750',\n", + " 'ENSG00000116747',\n", + " 'ENSG00000023572',\n", + " 'ENSG00000134371',\n", + " 'ENSG00000162630',\n", + " 'ENSG00000162687',\n", + " 'ENSG00000000971',\n", + " 'ENSG00000066279',\n", + " 'ENSG00000177888',\n", + " 'ENSG00000213047',\n", + " 'ENSG00000151414',\n", + " 'ENSG00000229989',\n", + " 'ENSG00000162702',\n", + " 'ENSG00000118193',\n", + " 'ENSG00000118197',\n", + " 'ENSG00000260088',\n", + " 'ENSG00000118200',\n", + " 'ENSG00000116852',\n", + " 'ENSG00000116857',\n", + " 'ENSG00000174307',\n", + " 'ENSG00000159176',\n", + " 'ENSG00000134369',\n", + " 'ENSG00000198700',\n", + " 'ENSG00000198892',\n", + " 'ENSG00000163431',\n", + " 'ENSG00000134375',\n", + " 'ENSG00000176393',\n", + " 'ENSG00000163435',\n", + " 'ENSG00000143862',\n", + " 'ENSG00000077152',\n", + " 'ENSG00000077157',\n", + " 'ENSG00000117139',\n", + " 'ENSG00000183155',\n", + " 'ENSG00000117153',\n", + " 'ENSG00000159346',\n", + " 'ENSG00000159348',\n", + " 'ENSG00000163444',\n", + " 'ENSG00000143847',\n", + " 'ENSG00000159388',\n", + " 'ENSG00000122176',\n", + " 'ENSG00000188783',\n", + " 'ENSG00000058668',\n", + " ...]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.get_var_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py:118: UserWarning: `var_names` for the loaded `adata` does not match those of the `adata` used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py:123: UserWarning: `var_names` for the loaded `adata` does not match those of the `adata` used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'soma_joinid'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_1006776/561006111.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_query_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreference_model\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(cls, adata, reference_model, registry, inplace_subset_query_vars, accelerator, device, unfrozen, freeze_dropout, freeze_expression, freeze_decoder_first_layer, freeze_batchnorm_encoder, freeze_batchnorm_decoder, freeze_classifier)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\"Cannot load the original setup.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0msetup_method\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregistry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0m_SETUP_METHOD_NAME\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m setup_method(\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0madata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0msource_registry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mregistry\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0mextend_categories\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scvi.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(cls, datamodule, source_registry, layer, batch_key, labels_key, size_factor_key, categorical_covariate_keys, continuous_covariate_keys, **kwargs)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;31m# it means we init the custom dataloder model with anndata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 287\u001b[0m categorical_mapping = source_registry[\"field_registries\"][\"batch\"][\"state_registry\"][\n\u001b[1;32m 288\u001b[0m \u001b[0;34m\"categorical_mapping\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mcolumn_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatamodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msoma_joinid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0mn_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msource_registry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"field_registries\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"batch\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"summary_stats\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"n_batch\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m datamodule.registry = {\n", + "\u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 6295\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6296\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6297\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6299\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'soma_joinid'" + ] + } + ], + "source": [ + "model2 = model.load_query_data(adata=ad, reference_model=model)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup \n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'transfer_fields'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[34], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_reconstruction_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mad\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.
\n", + " | Celltypes | \n", + "Loc_true | \n", + "suspension_type | \n", + "scsn | \n", + "donor_id | \n", + "Gender | \n", + "Sample | \n", + "ID | \n", + "assay_ontology_term_id | \n", + "Study | \n", + "n_genes | \n", + "n_genes_by_counts | \n", + "total_counts | \n", + "total_counts_mt | \n", + "pct_counts_mt | \n", + "PoolDon | \n", + "DonorPool | \n", + "Protocol_plot | \n", + "scDonor_snBatch | \n", + "is_primary_data | \n", + "disease_ontology_term_id | \n", + "organism_ontology_term_id | \n", + "donor_id_2 | \n", + "sex_ontology_term_id | \n", + "self_reported_ethnicity_ontology_term_id | \n", + "Age range | \n", + "Smoking status | \n", + "Years smoking | \n", + "BMI range | \n", + "development_stage_ontology_term_id | \n", + "Location_long | \n", + "Cell_fraction | \n", + "tissue_ontology_term_id | \n", + "cell_type_ontology_term_id | \n", + "tissue_type | \n", + "cell_type | \n", + "assay | \n", + "disease | \n", + "organism | \n", + "sex | \n", + "tissue | \n", + "self_reported_ethnicity | \n", + "development_stage | \n", + "observation_joinid | \n", + "nCount_RNA | \n", + "nFeature_RNA | \n", + "smoking | \n", + "packyears | \n", + "percent.mito | \n", + "batch | \n", + "dissection | \n", + "chemistry | \n", + "percent_mito | \n", + "n_counts | \n", + "leiden | \n", + "phase | \n", + "S_score | \n", + "G2M_score | \n", + "new_celltype | \n", + "score | \n", + "log1p_n_genes_by_counts | \n", + "log1p_total_counts | \n", + "mito_frac | \n", + "RBP_frac | \n", + "treatment | \n", + "histo | \n", + "procedure | \n", + "clusters | \n", + "author_cell_type | \n", + "clusters_fine | \n", + "HTAN_Biospecimen_ID | \n", + "HTAN_Participant_ID | \n", + "seurat_clusters | \n", + "celltype | \n", + "lineage | \n", + "nGene | \n", + "nReads | \n", + "plate.barcode | \n", + "cell.id | \n", + "region | \n", + "label | \n", + "sorter | \n", + "sort.location | \n", + "sample | \n", + "location | \n", + "percent.ercc | \n", + "percent.ribo | \n", + "gating | \n", + "free_annotation | \n", + "Number of splices: Total | \n", + "Number of splices: Annotated (sjdb) | \n", + "Number of splices: GT-AG | \n", + "Number of splices: GC-AG | \n", + "Number of splices: AT-AC | \n", + "Number of splices: Non-canonical | \n", + "Mapping speed, Million of reads per hour | \n", + "Average input read length | \n", + "compartment | \n", + "artifact_uid | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
WTDAtest7887999-GGGAGATGTGAGTGAC | \n", + "B_memory | \n", + "d_LowLeftPar | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7887999 | \n", + "A32-LNG-1-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "655.0 | \n", + "655.0 | \n", + "1676.563354 | \n", + "50.247673 | \n", + "2.997064 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Lower Left Lobe | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0008953 | \n", + "CL:0000787 | \n", + "tissue | \n", + "memory B cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "lower lobe of left lung | \n", + "European | \n", + "29-year-old human stage | \n", + "sj#d$bsNrL | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888001-CTAGAGTCAAGTTCTG | \n", + "B_plasma_IgG | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888001 | \n", + "A32-LNG-2-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1052.0 | \n", + "1052.0 | \n", + "14591.662109 | \n", + "1.973254 | \n", + "0.013523 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0003126 | \n", + "CL:0000985 | \n", + "tissue | \n", + "IgG plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "(xklu`s@kK | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888001-TACTTGTAGGACGAAA | \n", + "B_plasma_IgG | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888001 | \n", + "A32-LNG-2-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1346.0 | \n", + "1346.0 | \n", + "10357.476562 | \n", + "10.057343 | \n", + "0.097102 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0003126 | \n", + "CL:0000985 | \n", + "tissue | \n", + "IgG plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "PyS2QZSS6l | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888002-AAACGGGCAACCGCCA | \n", + "B_plasma_IgA | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888002 | \n", + "A32-LNG-2-SC-45P-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1612.0 | \n", + "1612.0 | \n", + "15639.706055 | \n", + "39.134060 | \n", + "0.250222 | \n", + "nan | \n", + "nan | \n", + "LibCD45pos | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45pos | \n", + "UBERON:0003126 | \n", + "CL:0000987 | \n", + "tissue | \n", + "IgA plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "B!$?ygWp`e | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888002-CACACCTAGTGTACCT | \n", + "B_plasma_IgA | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888002 | \n", + "A32-LNG-2-SC-45P-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1360.0 | \n", + "1360.0 | \n", + "8899.969727 | \n", + "97.390213 | \n", + "1.094276 | \n", + "nan | \n", + "nan | \n", + "LibCD45pos | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45pos | \n", + "UBERON:0003126 | \n", + "CL:0000987 | \n", + "tissue | \n", + "IgA plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "5%Alz6_H+> | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
WSSS_A_LNG8757929-TTTGGTTCACAAGCAG | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "3025.0 | \n", + "3025.0 | \n", + "7233.732910 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "{>Uy5Gwpj3 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTCAGCTTCGG | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2131.0 | \n", + "2131.0 | \n", + "4634.926270 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "2Ifg`5A3~k | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTGTGCCCTTT | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2723.0 | \n", + "2723.0 | \n", + "7334.214355 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "kmxB0`Q`cQ | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTTCAAGAGTA | \n", + "Basal | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "500.0 | \n", + "500.0 | \n", + "626.389465 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:0000646 | \n", + "tissue | \n", + "basal cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "R_svVa3weW | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGTTGAGCGAGTCA | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2576.0 | \n", + "2576.0 | \n", + "6651.600586 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "qEapk053ET | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
343281 rows × 99 columns
\n", + "SCVI model with the following parameters: \n", + "n_hidden: 128, n_latent: 10, n_layers: 1, dropout_rate: 0.1, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: False\n", + "\n" + ], + "text/plain": [ + "SCVI model with the following parameters: \n", + "n_hidden: \u001b[1;36m128\u001b[0m, n_latent: \u001b[1;36m10\u001b[0m, n_layers: \u001b[1;36m1\u001b[0m, dropout_rate: \u001b[1;36m0.1\u001b[0m, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.load(\"lamin_model\", adata=False)\n", + "model.load_query_data(adata=False, reference_model=\"lamin_model\", registry=datamodule.registry)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f5fc7b7faf684ca4a57c958e0564d3ec", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0%| | 0/1 [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], + "source": [ + "model2.train(max_epochs=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TTTTTTTT setup_anndata\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/data/fields/_base_field.py:63: UserWarning: adata.X does not contain unnormalized count data. Are you sure this is what you want?\n", + " self.validate_field(adata)\n" + ] + } + ], + "source": [ + "model3 = model.load(\"lamin_model\", adata=ad)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", + "/home/canergen/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "97b3d8aa95994b059faead86ce8f4872", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0%| | 0/1 [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/module/_vae.py:569: UserWarning: The value argument must be within the support of the distribution\n", + " reconst_loss = -generative_outputs[MODULE_KEYS.PX_KEY].log_prob(x).sum(-1)\n", + "`Trainer.fit` stopped: `max_epochs=1` reached.\n" + ] + } + ], + "source": [ + "model3.train(max_epochs=1)\n", + "model3.save(\"lamin_model_anndata\", save_anndata=False, overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model_anndata/model.pt already downloaded \n", + "\u001b[34mINFO \u001b[0m File lamin_model_anndata/model.pt already downloaded \n" + ] + }, + { + "data": { + "text/html": [ + "
SCVI model with the following parameters: \n", + "n_hidden: 128, n_latent: 10, n_layers: 1, dropout_rate: 0.1, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: False\n", + "\n" + ], + "text/plain": [ + "SCVI model with the following parameters: \n", + "n_hidden: \u001b[1;36m128\u001b[0m, n_latent: \u001b[1;36m10\u001b[0m, n_layers: \u001b[1;36m1\u001b[0m, dropout_rate: \u001b[1;36m0.1\u001b[0m, dispersion: gene, gene_likelihood: zinb, \n", + "latent_distribution: normal.\n", + "Training status: Not Trained\n", + "Model's adata is minified?: \u001b[3;91mFalse\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "dataloader = datamodule.inference_dataloader()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# _ = model.get_elbo(dataloader=dataloader)\n", + "# _ = model.get_marginal_ll(dataloader=dataloader)\n", + "# _ = model.get_reconstruction_error(dataloader=dataloader)\n", + "_ = model.get_latent_representation(dataloader=dataloader)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m File lamin_model/model.pt already downloaded \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_base_model.py:796: UserWarning: `var_names` for the loaded `model` does not match those used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Please set up your AnnData with SCVI.setup_anndata first.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlamin_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mad\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:800\u001b[0m, in \u001b[0;36mBaseModelClass.load\u001b[0;34m(cls, dir_path, adata, accelerator, device, prefix, backup_url)\u001b[0m\n\u001b[1;32m 797\u001b[0m method_name \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget(_SETUP_METHOD_NAME, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msetup_anndata\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 798\u001b[0m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, method_name)(adata, source_registry\u001b[38;5;241m=\u001b[39mregistry, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mregistry[_SETUP_ARGS_KEY])\n\u001b[0;32m--> 800\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43m_initialize_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr_dict\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 801\u001b[0m model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mon_load(model)\n\u001b[1;32m 802\u001b[0m model\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mload_state_dict(model_state_dict)\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_save_load.py:140\u001b[0m, in \u001b[0;36m_initialize_model\u001b[0;34m(cls, adata, registry, attr_dict)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m adata:\n\u001b[1;32m 138\u001b[0m adata \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 140\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregistry\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnon_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m attr, val \u001b[38;5;129;01min\u001b[39;00m attr_dict\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28msetattr\u001b[39m(model, attr, val)\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scvi.py:130\u001b[0m, in \u001b[0;36mSCVI.__init__\u001b[0;34m(self, adata, registry, n_hidden, n_latent, n_layers, dropout_rate, dispersion, gene_likelihood, latent_distribution, datamodule, **kwargs)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 118\u001b[0m adata: AnnData \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 129\u001b[0m ):\n\u001b[0;32m--> 130\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mregistry\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_module_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 133\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_hidden\u001b[39m\u001b[38;5;124m\"\u001b[39m: n_hidden,\n\u001b[1;32m 134\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_latent\u001b[39m\u001b[38;5;124m\"\u001b[39m: n_latent,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 141\u001b[0m }\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_model_summary_string \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 143\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSCVI model with the following parameters: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_hidden: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_hidden\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, n_latent: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_latent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, n_layers: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mn_layers\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdropout_rate: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdropout_rate\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, dispersion: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdispersion\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgene_likelihood: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mgene_likelihood\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, latent_distribution: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlatent_distribution\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 147\u001b[0m )\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:125\u001b[0m, in \u001b[0;36mBaseModelClass.__init__\u001b[0;34m(self, adata, registry)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m adata \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_adata \u001b[38;5;241m=\u001b[39m adata\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_adata_manager \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_most_recent_anndata_manager\u001b[49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequired\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_register_manager_for_instance(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata_manager)\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# Suffix registry instance variable with _ to include it when saving the model.\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_base_model.py:360\u001b[0m, in \u001b[0;36mBaseModelClass._get_most_recent_anndata_manager\u001b[0;34m(cls, adata, required)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _SCVI_UUID_KEY \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m adata\u001b[38;5;241m.\u001b[39muns:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m required:\n\u001b[0;32m--> 360\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 361\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease set up your AnnData with \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.setup_anndata first.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 362\u001b[0m )\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 365\u001b[0m adata_id \u001b[38;5;241m=\u001b[39m adata\u001b[38;5;241m.\u001b[39muns[_SCVI_UUID_KEY]\n", + "\u001b[0;31mValueError\u001b[0m: Please set up your AnnData with SCVI.setup_anndata first." + ] + } + ], + "source": [ + "model.load(\"lamin_model\", adata=ad)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.registry[\"field_registries\"][\"X\"][\"state_registry\"][\"column_names\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ENSG00000237491',\n", + " 'ENSG00000225880',\n", + " 'ENSG00000230368',\n", + " 'ENSG00000187634',\n", + " 'ENSG00000188976',\n", + " 'ENSG00000187961',\n", + " 'ENSG00000188290',\n", + " 'ENSG00000187608',\n", + " 'ENSG00000188157',\n", + " 'ENSG00000078808',\n", + " 'ENSG00000176022',\n", + " 'ENSG00000160087',\n", + " 'ENSG00000162572',\n", + " 'ENSG00000131584',\n", + " 'ENSG00000169972',\n", + " 'ENSG00000127054',\n", + " 'ENSG00000224051',\n", + " 'ENSG00000107404',\n", + " 'ENSG00000162576',\n", + " 'ENSG00000175756',\n", + " 'ENSG00000221978',\n", + " 'ENSG00000242485',\n", + " 'ENSG00000272455',\n", + " 'ENSG00000235098',\n", + " 'ENSG00000179403',\n", + " 'ENSG00000160072',\n", + " 'ENSG00000197785',\n", + " 'ENSG00000160075',\n", + " 'ENSG00000215014',\n", + " 'ENSG00000228594',\n", + " 'ENSG00000197530',\n", + " 'ENSG00000189409',\n", + " 'ENSG00000248333',\n", + " 'ENSG00000189339',\n", + " 'ENSG00000008128',\n", + " 'ENSG00000215790',\n", + " 'ENSG00000008130',\n", + " 'ENSG00000078369',\n", + " 'ENSG00000178821',\n", + " 'ENSG00000067606',\n", + " 'ENSG00000162585',\n", + " 'ENSG00000157933',\n", + " 'ENSG00000116151',\n", + " 'ENSG00000157916',\n", + " 'ENSG00000157911',\n", + " 'ENSG00000272449',\n", + " 'ENSG00000162591',\n", + " 'ENSG00000158109',\n", + " 'ENSG00000116213',\n", + " 'ENSG00000078900',\n", + " 'ENSG00000235169',\n", + " 'ENSG00000130764',\n", + " 'ENSG00000116198',\n", + " 'ENSG00000169598',\n", + " 'ENSG00000131697',\n", + " 'ENSG00000069424',\n", + " 'ENSG00000116251',\n", + " 'ENSG00000116237',\n", + " 'ENSG00000158292',\n", + " 'ENSG00000097021',\n", + " 'ENSG00000187017',\n", + " 'ENSG00000215788',\n", + " 'ENSG00000171680',\n", + " 'ENSG00000162408',\n", + " 'ENSG00000204859',\n", + " 'ENSG00000162413',\n", + " 'ENSG00000116273',\n", + " 'ENSG00000041988',\n", + " 'ENSG00000007923',\n", + " 'ENSG00000171735',\n", + " 'ENSG00000049245',\n", + " 'ENSG00000049246',\n", + " 'ENSG00000116288',\n", + " 'ENSG00000116285',\n", + " 'ENSG00000142599',\n", + " 'ENSG00000074800',\n", + " 'ENSG00000180758',\n", + " 'ENSG00000049239',\n", + " 'ENSG00000171621',\n", + " 'ENSG00000171612',\n", + " 'ENSG00000188807',\n", + " 'ENSG00000171608',\n", + " 'ENSG00000171603',\n", + " 'ENSG00000178585',\n", + " 'ENSG00000162441',\n", + " 'ENSG00000173614',\n", + " 'ENSG00000130939',\n", + " 'ENSG00000054523',\n", + " 'ENSG00000142657',\n", + " 'ENSG00000251503',\n", + " 'ENSG00000160049',\n", + " 'ENSG00000142655',\n", + " 'ENSG00000130940',\n", + " 'ENSG00000120948',\n", + " 'ENSG00000116649',\n", + " 'ENSG00000171824',\n", + " 'ENSG00000198793',\n", + " 'ENSG00000120942',\n", + " 'ENSG00000204624',\n", + " 'ENSG00000116661',\n", + " 'ENSG00000132879',\n", + " 'ENSG00000116663',\n", + " 'ENSG00000116670',\n", + " 'ENSG00000177674',\n", + " 'ENSG00000177000',\n", + " 'ENSG00000011021',\n", + " 'ENSG00000116685',\n", + " 'ENSG00000083444',\n", + " 'ENSG00000116688',\n", + " 'ENSG00000116691',\n", + " 'ENSG00000028137',\n", + " 'ENSG00000048707',\n", + " 'ENSG00000162496',\n", + " 'ENSG00000162493',\n", + " 'ENSG00000116731',\n", + " 'ENSG00000189337',\n", + " 'ENSG00000171729',\n", + " 'ENSG00000142634',\n", + " 'ENSG00000132906',\n", + " 'ENSG00000116138',\n", + " 'ENSG00000197312',\n", + " 'ENSG00000116786',\n", + " 'ENSG00000162461',\n", + " 'ENSG00000162458',\n", + " 'ENSG00000065526',\n", + " 'ENSG00000116809',\n", + " 'ENSG00000142627',\n", + " 'ENSG00000142632',\n", + " 'ENSG00000132881',\n", + " 'ENSG00000037637',\n", + " 'ENSG00000055070',\n", + " 'ENSG00000157191',\n", + " 'ENSG00000261135',\n", + " 'ENSG00000224174',\n", + " 'ENSG00000219481',\n", + " 'ENSG00000058453',\n", + " 'ENSG00000238142',\n", + " 'ENSG00000272426',\n", + " 'ENSG00000117122',\n", + " 'ENSG00000159363',\n", + " 'ENSG00000117118',\n", + " 'ENSG00000169991',\n", + " 'ENSG00000127481',\n", + " 'ENSG00000127463',\n", + " 'ENSG00000053372',\n", + " 'ENSG00000053371',\n", + " 'ENSG00000040487',\n", + " 'ENSG00000077549',\n", + " 'ENSG00000158747',\n", + " 'ENSG00000162542',\n", + " 'ENSG00000169914',\n", + " 'ENSG00000162543',\n", + " 'ENSG00000162545',\n", + " 'ENSG00000090432',\n", + " 'ENSG00000158828',\n", + " 'ENSG00000244038',\n", + " 'ENSG00000127483',\n", + " 'ENSG00000075151',\n", + " 'ENSG00000117298',\n", + " 'ENSG00000142794',\n", + " 'ENSG00000090686',\n", + " 'ENSG00000142798',\n", + " 'ENSG00000070831',\n", + " 'ENSG00000184677',\n", + " 'ENSG00000173372',\n", + " 'ENSG00000159189',\n", + " 'ENSG00000173369',\n", + " 'ENSG00000133216',\n", + " 'ENSG00000004487',\n", + " 'ENSG00000169641',\n", + " 'ENSG00000142676',\n", + " 'ENSG00000011007',\n", + " 'ENSG00000057757',\n", + " 'ENSG00000011009',\n", + " 'ENSG00000117308',\n", + " 'ENSG00000117305',\n", + " 'ENSG00000179163',\n", + " 'ENSG00000189266',\n", + " 'ENSG00000188529',\n", + " 'ENSG00000185436',\n", + " 'ENSG00000001460',\n", + " 'ENSG00000001461',\n", + " 'ENSG00000117602',\n", + " 'ENSG00000133226',\n", + " 'ENSG00000169504',\n", + " 'ENSG00000117614',\n", + " 'ENSG00000117616',\n", + " 'ENSG00000183726',\n", + " 'ENSG00000188672',\n", + " 'ENSG00000204178',\n", + " 'ENSG00000157978',\n", + " 'ENSG00000117643',\n", + " 'ENSG00000162430',\n", + " 'ENSG00000117640',\n", + " 'ENSG00000127423',\n", + " 'ENSG00000182749',\n", + " 'ENSG00000117632',\n", + " 'ENSG00000158006',\n", + " 'ENSG00000158008',\n", + " 'ENSG00000175087',\n", + " 'ENSG00000142684',\n", + " 'ENSG00000142675',\n", + " 'ENSG00000130695',\n", + " 'ENSG00000142669',\n", + " 'ENSG00000158062',\n", + " 'ENSG00000117682',\n", + " 'ENSG00000198830',\n", + " 'ENSG00000117713',\n", + " 'ENSG00000060642',\n", + " 'ENSG00000204160',\n", + " 'ENSG00000142751',\n", + " 'ENSG00000198746',\n", + " 'ENSG00000090273',\n", + " 'ENSG00000158246',\n", + " 'ENSG00000090020',\n", + " 'ENSG00000142784',\n", + " 'ENSG00000186501',\n", + " 'ENSG00000142765',\n", + " 'ENSG00000142733',\n", + " 'ENSG00000158195',\n", + " 'ENSG00000126705',\n", + " 'ENSG00000126709',\n", + " 'ENSG00000009780',\n", + " 'ENSG00000117758',\n", + " 'ENSG00000117751',\n", + " 'ENSG00000130775',\n", + " 'ENSG00000117748',\n", + " 'ENSG00000130768',\n", + " 'ENSG00000158156',\n", + " 'ENSG00000158161',\n", + " 'ENSG00000126698',\n", + " 'ENSG00000204138',\n", + " 'ENSG00000180198',\n", + " 'ENSG00000180098',\n", + " 'ENSG00000120656',\n", + " 'ENSG00000188060',\n", + " 'ENSG00000162419',\n", + " 'ENSG00000198492',\n", + " 'ENSG00000159023',\n", + " 'ENSG00000253304',\n", + " 'ENSG00000116350',\n", + " 'ENSG00000116353',\n", + " 'ENSG00000060656',\n", + " 'ENSG00000162511',\n", + " 'ENSG00000162512',\n", + " 'ENSG00000134644',\n", + " 'ENSG00000060688',\n", + " 'ENSG00000121766',\n", + " 'ENSG00000142910',\n", + " 'ENSG00000162517',\n", + " 'ENSG00000084636',\n", + " 'ENSG00000121753',\n", + " 'ENSG00000184007',\n", + " 'ENSG00000121774',\n", + " 'ENSG00000121775',\n", + " 'ENSG00000025800',\n", + " 'ENSG00000084652',\n", + " 'ENSG00000160050',\n", + " 'ENSG00000160051',\n", + " 'ENSG00000160055',\n", + " 'ENSG00000084623',\n", + " 'ENSG00000116478',\n", + " 'ENSG00000175130',\n", + " 'ENSG00000225828',\n", + " 'ENSG00000160058',\n", + " 'ENSG00000160062',\n", + " 'ENSG00000176261',\n", + " 'ENSG00000162521',\n", + " 'ENSG00000162520',\n", + " 'ENSG00000162522',\n", + " 'ENSG00000134684',\n", + " 'ENSG00000116497',\n", + " 'ENSG00000116514',\n", + " 'ENSG00000004455',\n", + " 'ENSG00000142920',\n", + " 'ENSG00000116525',\n", + " 'ENSG00000160094',\n", + " 'ENSG00000134686',\n", + " 'ENSG00000121903',\n", + " 'ENSG00000163866',\n", + " 'ENSG00000187513',\n", + " 'ENSG00000116544',\n", + " 'ENSG00000271741',\n", + " 'ENSG00000163867',\n", + " 'ENSG00000197056',\n", + " 'ENSG00000116560',\n", + " 'ENSG00000146463',\n", + " 'ENSG00000142687',\n", + " 'ENSG00000020129',\n", + " 'ENSG00000126067',\n", + " 'ENSG00000092853',\n", + " 'ENSG00000134698',\n", + " 'ENSG00000092847',\n", + " 'ENSG00000126070',\n", + " 'ENSG00000092850',\n", + " 'ENSG00000116863',\n", + " 'ENSG00000054116',\n", + " 'ENSG00000116871',\n", + " 'ENSG00000054118',\n", + " 'ENSG00000214193',\n", + " 'ENSG00000142694',\n", + " 'ENSG00000196182',\n", + " 'ENSG00000181817',\n", + " 'ENSG00000116885',\n", + " 'ENSG00000116898',\n", + " 'ENSG00000163874',\n", + " 'ENSG00000163875',\n", + " 'ENSG00000163877',\n", + " 'ENSG00000163879',\n", + " 'ENSG00000134697',\n", + " 'ENSG00000134690',\n", + " 'ENSG00000196449',\n", + " 'ENSG00000188786',\n", + " 'ENSG00000204084',\n", + " 'ENSG00000183431',\n", + " 'ENSG00000183386',\n", + " 'ENSG00000183520',\n", + " 'ENSG00000116954',\n", + " 'ENSG00000214114',\n", + " 'ENSG00000174574',\n", + " 'ENSG00000168653',\n", + " 'ENSG00000127603',\n", + " 'ENSG00000090621',\n", + " 'ENSG00000163909',\n", + " 'ENSG00000084072',\n", + " 'ENSG00000043514',\n", + " 'ENSG00000116990',\n", + " 'ENSG00000168389',\n", + " 'ENSG00000131236',\n", + " 'ENSG00000131238',\n", + " 'ENSG00000117000',\n", + " 'ENSG00000084073',\n", + " 'ENSG00000049089',\n", + " 'ENSG00000084070',\n", + " 'ENSG00000187801',\n", + " 'ENSG00000187815',\n", + " 'ENSG00000164002',\n", + " 'ENSG00000117010',\n", + " 'ENSG00000117016',\n", + " 'ENSG00000066136',\n", + " 'ENSG00000117013',\n", + " 'ENSG00000179862',\n", + " 'ENSG00000171793',\n", + " 'ENSG00000010803',\n", + " 'ENSG00000127124',\n", + " 'ENSG00000198815',\n", + " 'ENSG00000066185',\n", + " 'ENSG00000127125',\n", + " 'ENSG00000186409',\n", + " 'ENSG00000171960',\n", + " 'ENSG00000065978',\n", + " 'ENSG00000117385',\n", + " 'ENSG00000177868',\n", + " 'ENSG00000164010',\n", + " 'ENSG00000164011',\n", + " 'ENSG00000117394',\n", + " 'ENSG00000227533',\n", + " 'ENSG00000117395',\n", + " 'ENSG00000066056',\n", + " 'ENSG00000117399',\n", + " 'ENSG00000066322',\n", + " 'ENSG00000159479',\n", + " 'ENSG00000198198',\n", + " 'ENSG00000178922',\n", + " 'ENSG00000142949',\n", + " 'ENSG00000126091',\n", + " 'ENSG00000117408',\n", + " 'ENSG00000132768',\n", + " 'ENSG00000117410',\n", + " 'ENSG00000117411',\n", + " 'ENSG00000159214',\n", + " 'ENSG00000196517',\n", + " 'ENSG00000230615',\n", + " 'ENSG00000178028',\n", + " 'ENSG00000117419',\n", + " 'ENSG00000187147',\n", + " 'ENSG00000126106',\n", + " 'ENSG00000142945',\n", + " 'ENSG00000142937',\n", + " 'ENSG00000142959',\n", + " 'ENSG00000173846',\n", + " 'ENSG00000222009',\n", + " 'ENSG00000117425',\n", + " 'ENSG00000070785',\n", + " 'ENSG00000126107',\n", + " 'ENSG00000126088',\n", + " 'ENSG00000162415',\n", + " 'ENSG00000186603',\n", + " 'ENSG00000132781',\n", + " 'ENSG00000132773',\n", + " 'ENSG00000070759',\n", + " 'ENSG00000132763',\n", + " 'ENSG00000117450',\n", + " 'ENSG00000117448',\n", + " 'ENSG00000132780',\n", + " 'ENSG00000159592',\n", + " 'ENSG00000159596',\n", + " 'ENSG00000197429',\n", + " 'ENSG00000086015',\n", + " 'ENSG00000117472',\n", + " 'ENSG00000085998',\n", + " 'ENSG00000171357',\n", + " 'ENSG00000085999',\n", + " 'ENSG00000132128',\n", + " 'ENSG00000173660',\n", + " 'ENSG00000117481',\n", + " 'ENSG00000117480',\n", + " 'ENSG00000079277',\n", + " 'ENSG00000123472',\n", + " 'ENSG00000159658',\n", + " 'ENSG00000123473',\n", + " 'ENSG00000162368',\n", + " 'ENSG00000132122',\n", + " 'ENSG00000185104',\n", + " 'ENSG00000123080',\n", + " 'ENSG00000123091',\n", + " 'ENSG00000085832',\n", + " 'ENSG00000117859',\n", + " 'ENSG00000078618',\n", + " 'ENSG00000169213',\n", + " 'ENSG00000117862',\n", + " 'ENSG00000134717',\n", + " 'ENSG00000157077',\n", + " 'ENSG00000154222',\n", + " 'ENSG00000085840',\n", + " 'ENSG00000134748',\n", + " 'ENSG00000134744',\n", + " 'ENSG00000116157',\n", + " 'ENSG00000182183',\n", + " 'ENSG00000162377',\n", + " 'ENSG00000162378',\n", + " 'ENSG00000121310',\n", + " 'ENSG00000116171',\n", + " 'ENSG00000157184',\n", + " 'ENSG00000162385',\n", + " 'ENSG00000157193',\n", + " 'ENSG00000058804',\n", + " 'ENSG00000058799',\n", + " 'ENSG00000081870',\n", + " 'ENSG00000116212',\n", + " 'ENSG00000116209',\n", + " 'ENSG00000116205',\n", + " 'ENSG00000116221',\n", + " 'ENSG00000157216',\n", + " 'ENSG00000162390',\n", + " 'ENSG00000243725',\n", + " 'ENSG00000162396',\n", + " 'ENSG00000006555',\n", + " 'ENSG00000116133',\n", + " 'ENSG00000162402',\n", + " 'ENSG00000162407',\n", + " 'ENSG00000184292',\n", + " 'ENSG00000162601',\n", + " 'ENSG00000177606',\n", + " 'ENSG00000172456',\n", + " 'ENSG00000134709',\n", + " 'ENSG00000162599',\n", + " 'ENSG00000162604',\n", + " 'ENSG00000132849',\n", + " 'ENSG00000240563',\n", + " 'ENSG00000132854',\n", + " 'ENSG00000162607',\n", + " 'ENSG00000116641',\n", + " 'ENSG00000125703',\n", + " 'ENSG00000088035',\n", + " 'ENSG00000142856',\n", + " 'ENSG00000203965',\n", + " 'ENSG00000079739',\n", + " 'ENSG00000185483',\n", + " 'ENSG00000158966',\n", + " 'ENSG00000162437',\n", + " 'ENSG00000162434',\n", + " 'ENSG00000162433',\n", + " 'ENSG00000116675',\n", + " 'ENSG00000116678',\n", + " 'ENSG00000184588',\n", + " 'ENSG00000118473',\n", + " 'ENSG00000152760',\n", + " 'ENSG00000152763',\n", + " 'ENSG00000198160',\n", + " 'ENSG00000116704',\n", + " 'ENSG00000142864',\n", + " 'ENSG00000116717',\n", + " 'ENSG00000172380',\n", + " 'ENSG00000116729',\n", + " 'ENSG00000024526',\n", + " 'ENSG00000033122',\n", + " 'ENSG00000066557',\n", + " 'ENSG00000116754',\n", + " 'ENSG00000118454',\n", + " 'ENSG00000197568',\n", + " 'ENSG00000116761',\n", + " 'ENSG00000132485',\n", + " 'ENSG00000172260',\n", + " 'ENSG00000162620',\n", + " 'ENSG00000116791',\n", + " 'ENSG00000162623',\n", + " 'ENSG00000137968',\n", + " 'ENSG00000117054',\n", + " 'ENSG00000137955',\n", + " 'ENSG00000184005',\n", + " 'ENSG00000117069',\n", + " 'ENSG00000142892',\n", + " 'ENSG00000154027',\n", + " 'ENSG00000036549',\n", + " 'ENSG00000077254',\n", + " 'ENSG00000162614',\n", + " 'ENSG00000162613',\n", + " 'ENSG00000162616',\n", + " 'ENSG00000137959',\n", + " 'ENSG00000137965',\n", + " 'ENSG00000117114',\n", + " 'ENSG00000137941',\n", + " 'ENSG00000271576',\n", + " 'ENSG00000142875',\n", + " 'ENSG00000203943',\n", + " 'ENSG00000117133',\n", + " 'ENSG00000174021',\n", + " 'ENSG00000117151',\n", + " 'ENSG00000117155',\n", + " 'ENSG00000097096',\n", + " 'ENSG00000142867',\n", + " 'ENSG00000153904',\n", + " 'ENSG00000142871',\n", + " 'ENSG00000117174',\n", + " 'ENSG00000171502',\n", + " 'ENSG00000122417',\n", + " 'ENSG00000097033',\n", + " 'ENSG00000183291',\n", + " 'ENSG00000153936',\n", + " 'ENSG00000143013',\n", + " 'ENSG00000065243',\n", + " 'ENSG00000137947',\n", + " 'ENSG00000137944',\n", + " 'ENSG00000117226',\n", + " 'ENSG00000117228',\n", + " 'ENSG00000162654',\n", + " 'ENSG00000197147',\n", + " 'ENSG00000272931',\n", + " 'ENSG00000171492',\n", + " 'ENSG00000162664',\n", + " 'ENSG00000122482',\n", + " 'ENSG00000162669',\n", + " 'ENSG00000097046',\n", + " 'ENSG00000069702',\n", + " 'ENSG00000189195',\n", + " 'ENSG00000174842',\n", + " 'ENSG00000122484',\n", + " 'ENSG00000067208',\n", + " 'ENSG00000122406',\n", + " 'ENSG00000154511',\n", + " 'ENSG00000143033',\n", + " 'ENSG00000117500',\n", + " 'ENSG00000122483',\n", + " 'ENSG00000117505',\n", + " 'ENSG00000137942',\n", + " 'ENSG00000137936',\n", + " 'ENSG00000067334',\n", + " 'ENSG00000023909',\n", + " 'ENSG00000137962',\n", + " 'ENSG00000117528',\n", + " 'ENSG00000117525',\n", + " 'ENSG00000117519',\n", + " 'ENSG00000172339',\n", + " 'ENSG00000152078',\n", + " 'ENSG00000117569',\n", + " 'ENSG00000188641',\n", + " 'ENSG00000162627',\n", + " 'ENSG00000117598',\n", + " 'ENSG00000117600',\n", + " 'ENSG00000099260',\n", + " 'ENSG00000156869',\n", + " 'ENSG00000162688',\n", + " 'ENSG00000117620',\n", + " 'ENSG00000156875',\n", + " 'ENSG00000156876',\n", + " 'ENSG00000122435',\n", + " 'ENSG00000137992',\n", + " 'ENSG00000137996',\n", + " 'ENSG00000079335',\n", + " 'ENSG00000162692',\n", + " 'ENSG00000162694',\n", + " 'ENSG00000162695',\n", + " 'ENSG00000117543',\n", + " 'ENSG00000170989',\n", + " 'ENSG00000185946',\n", + " 'ENSG00000240038',\n", + " 'ENSG00000198890',\n", + " 'ENSG00000162631',\n", + " 'ENSG00000134215',\n", + " 'ENSG00000162636',\n", + " 'ENSG00000162639',\n", + " 'ENSG00000134186',\n", + " 'ENSG00000116266',\n", + " 'ENSG00000121957',\n", + " 'ENSG00000085433',\n", + " 'ENSG00000197780',\n", + " 'ENSG00000215717',\n", + " 'ENSG00000116299',\n", + " 'ENSG00000031698',\n", + " 'ENSG00000143126',\n", + " 'ENSG00000134222',\n", + " 'ENSG00000134243',\n", + " 'ENSG00000143106',\n", + " 'ENSG00000162650',\n", + " 'ENSG00000174151',\n", + " 'ENSG00000065135',\n", + " 'ENSG00000116337',\n", + " 'ENSG00000168765',\n", + " 'ENSG00000213366',\n", + " 'ENSG00000134201',\n", + " 'ENSG00000134202',\n", + " 'ENSG00000184371',\n", + " 'ENSG00000168710',\n", + " 'ENSG00000143093',\n", + " 'ENSG00000116396',\n", + " 'ENSG00000162775',\n", + " 'ENSG00000168679',\n", + " 'ENSG00000134248',\n", + " 'ENSG00000143119',\n", + " 'ENSG00000121931',\n", + " 'ENSG00000156171',\n", + " 'ENSG00000134255',\n", + " 'ENSG00000116455',\n", + " 'ENSG00000116459',\n", + " 'ENSG00000116473',\n", + " 'ENSG00000197852',\n", + " 'ENSG00000064703',\n", + " 'ENSG00000143079',\n", + " 'ENSG00000134245',\n", + " 'ENSG00000007341',\n", + " 'ENSG00000116489',\n", + " 'ENSG00000155363',\n", + " 'ENSG00000155366',\n", + " 'ENSG00000238198',\n", + " 'ENSG00000198799',\n", + " 'ENSG00000081026',\n", + " 'ENSG00000116793',\n", + " 'ENSG00000081019',\n", + " 'ENSG00000134262',\n", + " 'ENSG00000118655',\n", + " 'ENSG00000163349',\n", + " 'ENSG00000116774',\n", + " 'ENSG00000134207',\n", + " 'ENSG00000197323',\n", + " 'ENSG00000116752',\n", + " 'ENSG00000175984',\n", + " 'ENSG00000213281',\n", + " 'ENSG00000009307',\n", + " 'ENSG00000052723',\n", + " 'ENSG00000134198',\n", + " 'ENSG00000173218',\n", + " 'ENSG00000163393',\n", + " 'ENSG00000163399',\n", + " 'ENSG00000116815',\n", + " 'ENSG00000143061',\n", + " 'ENSG00000134247',\n", + " 'ENSG00000116830',\n", + " 'ENSG00000134253',\n", + " 'ENSG00000198162',\n", + " 'ENSG00000183508',\n", + " 'ENSG00000196505',\n", + " 'ENSG00000065183',\n", + " 'ENSG00000155761',\n", + " 'ENSG00000116874',\n", + " 'ENSG00000143067',\n", + " 'ENSG00000092621',\n", + " 'ENSG00000134250',\n", + " 'ENSG00000226067',\n", + " 'ENSG00000188610',\n", + " 'ENSG00000232527',\n", + " 'ENSG00000215784',\n", + " 'ENSG00000162825',\n", + " 'ENSG00000117262',\n", + " 'ENSG00000186141',\n", + " 'ENSG00000186364',\n", + " 'ENSG00000131788',\n", + " 'ENSG00000198483',\n", + " 'ENSG00000143127',\n", + " 'ENSG00000131779',\n", + " 'ENSG00000121851',\n", + " 'ENSG00000131791',\n", + " 'ENSG00000131781',\n", + " 'ENSG00000131778',\n", + " 'ENSG00000116128',\n", + " 'ENSG00000162836',\n", + " 'ENSG00000188092',\n", + " 'ENSG00000178104',\n", + " 'ENSG00000203814',\n", + " 'ENSG00000183598',\n", + " 'ENSG00000288825',\n", + " 'ENSG00000184678',\n", + " 'ENSG00000184260',\n", + " 'ENSG00000184270',\n", + " 'ENSG00000178096',\n", + " 'ENSG00000159164',\n", + " 'ENSG00000143368',\n", + " 'ENSG00000014914',\n", + " 'ENSG00000136631',\n", + " 'ENSG00000023902',\n", + " 'ENSG00000143401',\n", + " 'ENSG00000117362',\n", + " 'ENSG00000159208',\n", + " 'ENSG00000117360',\n", + " 'ENSG00000163125',\n", + " 'ENSG00000143374',\n", + " 'ENSG00000143369',\n", + " 'ENSG00000228126',\n", + " 'ENSG00000143382',\n", + " 'ENSG00000143384',\n", + " 'ENSG00000143420',\n", + " 'ENSG00000143457',\n", + " 'ENSG00000163131',\n", + " 'ENSG00000143387',\n", + " 'ENSG00000143437',\n", + " 'ENSG00000143379',\n", + " 'ENSG00000143418',\n", + " 'ENSG00000143409',\n", + " 'ENSG00000143363',\n", + " 'ENSG00000197622',\n", + " 'ENSG00000213190',\n", + " 'ENSG00000143458',\n", + " 'ENSG00000143434',\n", + " 'ENSG00000163154',\n", + " 'ENSG00000163156',\n", + " 'ENSG00000163155',\n", + " 'ENSG00000163159',\n", + " 'ENSG00000143398',\n", + " 'ENSG00000159352',\n", + " 'ENSG00000143373',\n", + " 'ENSG00000143393',\n", + " 'ENSG00000143390',\n", + " 'ENSG00000143416',\n", + " 'ENSG00000159377',\n", + " 'ENSG00000143442',\n", + " 'ENSG00000143375',\n", + " 'ENSG00000143367',\n", + " 'ENSG00000143376',\n", + " 'ENSG00000178796',\n", + " 'ENSG00000143436',\n", + " 'ENSG00000143450',\n", + " 'ENSG00000182134',\n", + " 'ENSG00000159445',\n", + " 'ENSG00000197747',\n", + " 'ENSG00000163191',\n", + " 'ENSG00000197956',\n", + " 'ENSG00000196154',\n", + " 'ENSG00000196754',\n", + " 'ENSG00000188643',\n", + " 'ENSG00000189334',\n", + " 'ENSG00000189171',\n", + " 'ENSG00000160678',\n", + " 'ENSG00000160679',\n", + " 'ENSG00000143553',\n", + " 'ENSG00000143621',\n", + " 'ENSG00000198837',\n", + " 'ENSG00000160741',\n", + " 'ENSG00000143570',\n", + " 'ENSG00000143578',\n", + " 'ENSG00000143543',\n", + " 'ENSG00000272654',\n", + " 'ENSG00000143545',\n", + " 'ENSG00000177954',\n", + " 'ENSG00000143549',\n", + " 'ENSG00000143569',\n", + " 'ENSG00000143575',\n", + " 'ENSG00000143515',\n", + " 'ENSG00000160712',\n", + " 'ENSG00000169291',\n", + " 'ENSG00000160714',\n", + " 'ENSG00000160710',\n", + " 'ENSG00000163344',\n", + " 'ENSG00000270361',\n", + " 'ENSG00000163346',\n", + " 'ENSG00000163348',\n", + " 'ENSG00000160691',\n", + " 'ENSG00000160688',\n", + " 'ENSG00000160685',\n", + " 'ENSG00000143537',\n", + " 'ENSG00000143590',\n", + " 'ENSG00000169242',\n", + " 'ENSG00000169241',\n", + " 'ENSG00000179085',\n", + " 'ENSG00000163463',\n", + " 'ENSG00000163462',\n", + " 'ENSG00000185499',\n", + " 'ENSG00000169231',\n", + " 'ENSG00000173171',\n", + " 'ENSG00000160752',\n", + " 'ENSG00000160753',\n", + " 'ENSG00000116539',\n", + " 'ENSG00000125459',\n", + " 'ENSG00000163374',\n", + " 'ENSG00000132676',\n", + " 'ENSG00000116580',\n", + " 'ENSG00000132718',\n", + " 'ENSG00000143622',\n", + " 'ENSG00000132680',\n", + " 'ENSG00000116584',\n", + " 'ENSG00000163479',\n", + " 'ENSG00000160803',\n", + " 'ENSG00000116586',\n", + " 'ENSG00000132698',\n", + " 'ENSG00000254726',\n", + " 'ENSG00000160789',\n", + " 'ENSG00000196189',\n", + " 'ENSG00000160785',\n", + " 'ENSG00000260238',\n", + " 'ENSG00000160781',\n", + " 'ENSG00000198952',\n", + " 'ENSG00000163472',\n", + " 'ENSG00000198715',\n", + " 'ENSG00000163468',\n", + " 'ENSG00000163467',\n", + " 'ENSG00000116604',\n", + " 'ENSG00000183856',\n", + " 'ENSG00000163382',\n", + " 'ENSG00000160818',\n", + " 'ENSG00000132688',\n", + " 'ENSG00000143320',\n", + " 'ENSG00000143319',\n", + " 'ENSG00000143303',\n", + " 'ENSG00000143314',\n", + " 'ENSG00000143321',\n", + " 'ENSG00000143294',\n", + " 'ENSG00000187800',\n", + " 'ENSG00000132694',\n", + " 'ENSG00000253831',\n", + " 'ENSG00000117036',\n", + " 'ENSG00000183853',\n", + " 'ENSG00000163565',\n", + " 'ENSG00000158716',\n", + " 'ENSG00000158710',\n", + " 'ENSG00000085552',\n", + " 'ENSG00000162729',\n", + " 'ENSG00000018625',\n", + " 'ENSG00000162734',\n", + " 'ENSG00000132716',\n", + " 'ENSG00000162735',\n", + " 'ENSG00000122218',\n", + " 'ENSG00000162736',\n", + " 'ENSG00000162738',\n", + " 'ENSG00000066294',\n", + " 'ENSG00000158769',\n", + " 'ENSG00000215845',\n", + " 'ENSG00000158773',\n", + " 'ENSG00000162755',\n", + " 'ENSG00000143256',\n", + " 'ENSG00000158793',\n", + " 'ENSG00000158796',\n", + " 'ENSG00000143222',\n", + " 'ENSG00000143258',\n", + " 'ENSG00000143224',\n", + " 'ENSG00000158850',\n", + " 'ENSG00000158859',\n", + " 'ENSG00000158864',\n", + " 'ENSG00000158869',\n", + " 'ENSG00000158882',\n", + " 'ENSG00000158887',\n", + " 'ENSG00000143252',\n", + " 'ENSG00000143226',\n", + " 'ENSG00000173110',\n", + " 'ENSG00000162746',\n", + " 'ENSG00000081721',\n", + " 'ENSG00000118217',\n", + " 'ENSG00000198929',\n", + " 'ENSG00000152332',\n", + " 'ENSG00000117143',\n", + " 'ENSG00000162733',\n", + " 'ENSG00000132196',\n", + " 'ENSG00000143228',\n", + " 'ENSG00000185630',\n", + " 'ENSG00000143171',\n", + " 'ENSG00000143198',\n", + " 'ENSG00000143149',\n", + " 'ENSG00000143183',\n", + " 'ENSG00000143179',\n", + " 'ENSG00000143157',\n", + " 'ENSG00000152382',\n", + " 'ENSG00000143190',\n", + " 'ENSG00000143162',\n", + " 'ENSG00000198771',\n", + " 'ENSG00000197965',\n", + " 'ENSG00000143158',\n", + " 'ENSG00000143164',\n", + " 'ENSG00000143147',\n", + " 'ENSG00000143155',\n", + " 'ENSG00000213064',\n", + " 'ENSG00000143178',\n", + " 'ENSG00000143153',\n", + " 'ENSG00000143156',\n", + " 'ENSG00000117475',\n", + " 'ENSG00000117479',\n", + " 'ENSG00000171806',\n", + " 'ENSG00000000457',\n", + " 'ENSG00000075945',\n", + " 'ENSG00000120370',\n", + " 'ENSG00000116132',\n", + " 'ENSG00000076258',\n", + " 'ENSG00000117523',\n", + " 'ENSG00000117533',\n", + " 'ENSG00000010165',\n", + " 'ENSG00000197959',\n", + " 'ENSG00000135845',\n", + " 'ENSG00000094975',\n", + " 'ENSG00000117592',\n", + " 'ENSG00000076321',\n", + " 'ENSG00000120334',\n", + " 'ENSG00000117593',\n", + " 'ENSG00000185278',\n", + " 'ENSG00000135870',\n", + " 'ENSG00000227373',\n", + " 'ENSG00000152061',\n", + " 'ENSG00000116161',\n", + " 'ENSG00000120333',\n", + " 'ENSG00000143207',\n", + " 'ENSG00000075391',\n", + " 'ENSG00000116191',\n", + " 'ENSG00000116194',\n", + " 'ENSG00000116199',\n", + " 'ENSG00000186283',\n", + " 'ENSG00000143322',\n", + " 'ENSG00000057252',\n", + " 'ENSG00000169905',\n", + " 'ENSG00000272906',\n", + " 'ENSG00000143337',\n", + " 'ENSG00000135837',\n", + " 'ENSG00000116260',\n", + " 'ENSG00000230124',\n", + " 'ENSG00000143324',\n", + " 'ENSG00000135835',\n", + " 'ENSG00000135823',\n", + " 'ENSG00000162783',\n", + " 'ENSG00000135821',\n", + " 'ENSG00000135828',\n", + " 'ENSG00000143333',\n", + " 'ENSG00000135838',\n", + " 'ENSG00000135829',\n", + " 'ENSG00000135862',\n", + " 'ENSG00000058085',\n", + " 'ENSG00000116698',\n", + " 'ENSG00000116701',\n", + " 'ENSG00000162704',\n", + " 'ENSG00000143344',\n", + " 'ENSG00000198756',\n", + " 'ENSG00000198860',\n", + " 'ENSG00000271387',\n", + " 'ENSG00000116406',\n", + " 'ENSG00000135842',\n", + " 'ENSG00000121481',\n", + " 'ENSG00000121486',\n", + " 'ENSG00000116668',\n", + " 'ENSG00000116679',\n", + " 'ENSG00000143341',\n", + " 'ENSG00000047410',\n", + " 'ENSG00000116711',\n", + " 'ENSG00000162670',\n", + " 'ENSG00000116741',\n", + " 'ENSG00000116750',\n", + " 'ENSG00000116747',\n", + " 'ENSG00000023572',\n", + " 'ENSG00000134371',\n", + " 'ENSG00000162630',\n", + " 'ENSG00000162687',\n", + " 'ENSG00000000971',\n", + " 'ENSG00000066279',\n", + " 'ENSG00000177888',\n", + " 'ENSG00000213047',\n", + " 'ENSG00000151414',\n", + " 'ENSG00000229989',\n", + " 'ENSG00000162702',\n", + " 'ENSG00000118193',\n", + " 'ENSG00000118197',\n", + " 'ENSG00000260088',\n", + " 'ENSG00000118200',\n", + " 'ENSG00000116852',\n", + " 'ENSG00000116857',\n", + " 'ENSG00000174307',\n", + " 'ENSG00000159176',\n", + " 'ENSG00000134369',\n", + " 'ENSG00000198700',\n", + " 'ENSG00000198892',\n", + " 'ENSG00000163431',\n", + " 'ENSG00000134375',\n", + " 'ENSG00000176393',\n", + " 'ENSG00000163435',\n", + " 'ENSG00000143862',\n", + " 'ENSG00000077152',\n", + " 'ENSG00000077157',\n", + " 'ENSG00000117139',\n", + " 'ENSG00000183155',\n", + " 'ENSG00000117153',\n", + " 'ENSG00000159346',\n", + " 'ENSG00000159348',\n", + " 'ENSG00000163444',\n", + " 'ENSG00000143847',\n", + " 'ENSG00000159388',\n", + " 'ENSG00000122176',\n", + " 'ENSG00000188783',\n", + " 'ENSG00000058668',\n", + " ...]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.get_var_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py:118: UserWarning: `var_names` for the loaded `adata` does not match those of the `adata` used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n", + "/home/canergen/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py:123: UserWarning: `var_names` for the loaded `adata` does not match those of the `adata` used to train the model. For valid results, the former should match the latter.\n", + " _validate_var_names(adata, var_names)\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'soma_joinid'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_1006776/561006111.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_query_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreference_model\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Documents/scvi-tools/src/scvi/model/base/_archesmixin.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(cls, adata, reference_model, registry, inplace_subset_query_vars, accelerator, device, unfrozen, freeze_dropout, freeze_expression, freeze_decoder_first_layer, freeze_batchnorm_encoder, freeze_batchnorm_decoder, freeze_classifier)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\"Cannot load the original setup.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0msetup_method\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregistry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0m_SETUP_METHOD_NAME\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m setup_method(\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0madata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0msource_registry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mregistry\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0mextend_categories\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Documents/scvi-tools/src/scvi/model/_scvi.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(cls, datamodule, source_registry, layer, batch_key, labels_key, size_factor_key, categorical_covariate_keys, continuous_covariate_keys, **kwargs)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;31m# it means we init the custom dataloder model with anndata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 287\u001b[0m categorical_mapping = source_registry[\"field_registries\"][\"batch\"][\"state_registry\"][\n\u001b[1;32m 288\u001b[0m \u001b[0;34m\"categorical_mapping\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mcolumn_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatamodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msoma_joinid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0mn_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msource_registry\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"field_registries\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"batch\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"summary_stats\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"n_batch\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m datamodule.registry = {\n", + "\u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 6295\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6296\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6297\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6299\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'soma_joinid'" + ] + } + ], + "source": [ + "model2 = model.load_query_data(adata=ad, reference_model=model)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34mINFO \u001b[0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup \n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'transfer_fields'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[34], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m b \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_reconstruction_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mad\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/SQ6yRUPo/scvi-tools/lib/python3.12/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.
\n", + " | Celltypes | \n", + "Loc_true | \n", + "suspension_type | \n", + "scsn | \n", + "donor_id | \n", + "Gender | \n", + "Sample | \n", + "ID | \n", + "assay_ontology_term_id | \n", + "Study | \n", + "n_genes | \n", + "n_genes_by_counts | \n", + "total_counts | \n", + "total_counts_mt | \n", + "pct_counts_mt | \n", + "PoolDon | \n", + "DonorPool | \n", + "Protocol_plot | \n", + "scDonor_snBatch | \n", + "is_primary_data | \n", + "disease_ontology_term_id | \n", + "organism_ontology_term_id | \n", + "donor_id_2 | \n", + "sex_ontology_term_id | \n", + "self_reported_ethnicity_ontology_term_id | \n", + "Age range | \n", + "Smoking status | \n", + "Years smoking | \n", + "BMI range | \n", + "development_stage_ontology_term_id | \n", + "Location_long | \n", + "Cell_fraction | \n", + "tissue_ontology_term_id | \n", + "cell_type_ontology_term_id | \n", + "tissue_type | \n", + "cell_type | \n", + "assay | \n", + "disease | \n", + "organism | \n", + "sex | \n", + "tissue | \n", + "self_reported_ethnicity | \n", + "development_stage | \n", + "observation_joinid | \n", + "nCount_RNA | \n", + "nFeature_RNA | \n", + "smoking | \n", + "packyears | \n", + "percent.mito | \n", + "batch | \n", + "dissection | \n", + "chemistry | \n", + "percent_mito | \n", + "n_counts | \n", + "leiden | \n", + "phase | \n", + "S_score | \n", + "G2M_score | \n", + "new_celltype | \n", + "score | \n", + "log1p_n_genes_by_counts | \n", + "log1p_total_counts | \n", + "mito_frac | \n", + "RBP_frac | \n", + "treatment | \n", + "histo | \n", + "procedure | \n", + "clusters | \n", + "author_cell_type | \n", + "clusters_fine | \n", + "HTAN_Biospecimen_ID | \n", + "HTAN_Participant_ID | \n", + "seurat_clusters | \n", + "celltype | \n", + "lineage | \n", + "nGene | \n", + "nReads | \n", + "plate.barcode | \n", + "cell.id | \n", + "region | \n", + "label | \n", + "sorter | \n", + "sort.location | \n", + "sample | \n", + "location | \n", + "percent.ercc | \n", + "percent.ribo | \n", + "gating | \n", + "free_annotation | \n", + "Number of splices: Total | \n", + "Number of splices: Annotated (sjdb) | \n", + "Number of splices: GT-AG | \n", + "Number of splices: GC-AG | \n", + "Number of splices: AT-AC | \n", + "Number of splices: Non-canonical | \n", + "Mapping speed, Million of reads per hour | \n", + "Average input read length | \n", + "compartment | \n", + "artifact_uid | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
WTDAtest7887999-GGGAGATGTGAGTGAC | \n", + "B_memory | \n", + "d_LowLeftPar | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7887999 | \n", + "A32-LNG-1-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "655.0 | \n", + "655.0 | \n", + "1676.563354 | \n", + "50.247673 | \n", + "2.997064 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Lower Left Lobe | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0008953 | \n", + "CL:0000787 | \n", + "tissue | \n", + "memory B cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "lower lobe of left lung | \n", + "European | \n", + "29-year-old human stage | \n", + "sj#d$bsNrL | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888001-CTAGAGTCAAGTTCTG | \n", + "B_plasma_IgG | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888001 | \n", + "A32-LNG-2-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1052.0 | \n", + "1052.0 | \n", + "14591.662109 | \n", + "1.973254 | \n", + "0.013523 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0003126 | \n", + "CL:0000985 | \n", + "tissue | \n", + "IgG plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "(xklu`s@kK | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888001-TACTTGTAGGACGAAA | \n", + "B_plasma_IgG | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888001 | \n", + "A32-LNG-2-SC-45N-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1346.0 | \n", + "1346.0 | \n", + "10357.476562 | \n", + "10.057343 | \n", + "0.097102 | \n", + "nan | \n", + "nan | \n", + "LibCD45neg_TrypLibUndigest | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45neg and Liberase undigested | \n", + "UBERON:0003126 | \n", + "CL:0000985 | \n", + "tissue | \n", + "IgG plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "PyS2QZSS6l | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888002-AAACGGGCAACCGCCA | \n", + "B_plasma_IgA | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888002 | \n", + "A32-LNG-2-SC-45P-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1612.0 | \n", + "1612.0 | \n", + "15639.706055 | \n", + "39.134060 | \n", + "0.250222 | \n", + "nan | \n", + "nan | \n", + "LibCD45pos | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45pos | \n", + "UBERON:0003126 | \n", + "CL:0000987 | \n", + "tissue | \n", + "IgA plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "B!$?ygWp`e | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
WTDAtest7888002-CACACCTAGTGTACCT | \n", + "B_plasma_IgA | \n", + "a_Trachea | \n", + "cell | \n", + "cells | \n", + "A32 | \n", + "M | \n", + "WTDAtest7888002 | \n", + "A32-LNG-2-SC-45P-1 | \n", + "EFO:0009899 | \n", + "dissociation | \n", + "1360.0 | \n", + "1360.0 | \n", + "8899.969727 | \n", + "97.390213 | \n", + "1.094276 | \n", + "nan | \n", + "nan | \n", + "LibCD45pos | \n", + "A32_cells | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "411C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "25-30 | \n", + "current | \n", + "15.0 | \n", + "20-24 | \n", + "HsapDv:0000123 | \n", + "Trachea | \n", + "CD45pos | \n", + "UBERON:0003126 | \n", + "CL:0000987 | \n", + "tissue | \n", + "IgA plasma cell | \n", + "10x 3' v2 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "trachea | \n", + "European | \n", + "29-year-old human stage | \n", + "5%Alz6_H+> | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "qOw3M7KtZ0QjEEtUcSy9 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
WSSS_A_LNG8757929-TTTGGTTCACAAGCAG | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "3025.0 | \n", + "3025.0 | \n", + "7233.732910 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "{>Uy5Gwpj3 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTCAGCTTCGG | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2131.0 | \n", + "2131.0 | \n", + "4634.926270 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "2Ifg`5A3~k | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTGTGCCCTTT | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2723.0 | \n", + "2723.0 | \n", + "7334.214355 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "kmxB0`Q`cQ | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGGTTTCAAGAGTA | \n", + "Basal | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "500.0 | \n", + "500.0 | \n", + "626.389465 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:0000646 | \n", + "tissue | \n", + "basal cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "R_svVa3weW | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
WSSS_A_LNG8757929-TTTGTTGAGCGAGTCA | \n", + "SMG_Mucous | \n", + "a_Trachea | \n", + "nucleus | \n", + "nuclei | \n", + "A42 | \n", + "nan | \n", + "WSSS_A_LNG8757929 | \n", + "WSSS_A_LNG8757929 | \n", + "EFO:0009922 | \n", + "nan | \n", + "2576.0 | \n", + "2576.0 | \n", + "6651.600586 | \n", + "0.000000 | \n", + "0.000000 | \n", + "A42 | \n", + "A42none | \n", + "nuclei | \n", + "A42_nuclei | \n", + "False | \n", + "PATO:0000461 | \n", + "NCBITaxon:9606 | \n", + "456C | \n", + "PATO:0000384 | \n", + "HANCESTRO:0005 | \n", + "60-64 | \n", + "unknown | \n", + "NaN | \n", + "30-34 | \n", + "HsapDv:0000241 | \n", + "Bronchi 2-3 divison without surrounding parenc... | \n", + "All cells | \n", + "UBERON:0002185 | \n", + "CL:1000272 | \n", + "tissue | \n", + "lung secretory cell | \n", + "10x 3' v3 | \n", + "normal | \n", + "Homo sapiens | \n", + "male | \n", + "bronchus | \n", + "European | \n", + "seventh decade human stage | \n", + "qEapk053ET | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "5VCheRCxgdRWtDnBNVQC | \n", + "
343281 rows × 99 columns
\n", + "