diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..830bd67 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,3 @@ +# Changelog +## 1.2.5 +* Allow `bean run .. tiling` for untranslated `--allele-df-key`. diff --git a/README.md b/README.md index ae15dca..14ee4d8 100755 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ Python package `bean` supports multiple data wrangling functionalities for `Repo * Full pipeline takes 90.1s in GitHub Action for toy dataset of 2 replicates and 30 guides. ## Contributing -If you have questions or feature request, please open an issue. Please feel free to send a pull request. +See [CHANGELOG](CHANGELOG.md) for recent updates. If you have questions or feature request, please open an issue. Please feel free to send a pull request. ## Citation If you have used BEAN for your analysis, please cite: diff --git a/bean/preprocessing/data_class.py b/bean/preprocessing/data_class.py index ee101b1..bcc92da 100755 --- a/bean/preprocessing/data_class.py +++ b/bean/preprocessing/data_class.py @@ -581,20 +581,20 @@ def _set_uid_to_row(row): allele_counts_selected[allele_col].map(lambda a: "!" in str(a)).any() ), "uid not assinged." guide_to_allele, reindexed_df = self.reindex_allele_df( - allele_counts_selected + allele_counts_selected, allele_col ) # TODO: Fix this for combining different sorting scheme self.n_max_alleles = ( reindexed_df.index.get_level_values("allele_id_for_guide").max() + 1 ) # include no edit allele if edit_index is None: - self.edit_index = get_edit_to_index_dict(guide_to_allele.aa_allele) + self.edit_index = get_edit_to_index_dict(guide_to_allele[allele_col]) else: self.edit_index = edit_index self.n_edits = len(self.edit_index.keys()) self.allele_to_edit = self.get_allele_to_edit_tensor( - self.screen_control, self.edit_index, guide_to_allele + self.screen_control, self.edit_index, guide_to_allele, allele_col ) assert self.allele_to_edit.shape == ( self.n_guides, @@ -627,6 +627,7 @@ def get_allele_to_edit_tensor( screen, edits_to_index: Dict[str, int], guide_allele_id_to_allele_df: pd.DataFrame, + allele_col="aa_allele", ) -> torch.Tensor: """ Convert (guide, allele_id_for_guide) -> allele DataFrame into the tensor with shape (n_guides, n_max_alleles_per_guide, n_edits) tensor. @@ -638,11 +639,14 @@ def get_allele_to_edit_tensor( Returns allele_edit_assignment: Binary tensor of shape (n_guides, n_max_alleles_per_guide, n_edits. allele_edit_assignment(i, j, k) is 1 if jth allele of ith guide has kth edit. """ - guide_allele_id_to_allele_df["edits"] = ( - guide_allele_id_to_allele_df.aa_allele.map( - lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits) - ) - ) + if allele_col == "aa_allele": + guide_allele_id_to_allele_df["edits"] = guide_allele_id_to_allele_df[ + allele_col + ].map(lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits)) + else: + guide_allele_id_to_allele_df["edits"] = guide_allele_id_to_allele_df[ + allele_col + ].map(lambda a: list(a.edits)) guide_allele_id_to_allele_df = guide_allele_id_to_allele_df.reset_index() guide_allele_id_to_allele_df["edit_idx"] = ( guide_allele_id_to_allele_df.edits.map( @@ -663,7 +667,7 @@ def get_allele_to_edit_tensor( allele_edit_assignment[i, j, guide_allele_id_to_edit_df.iloc[i, j]] = 1 return allele_edit_assignment - def reindex_allele_df(self, alleles_df): + def reindex_allele_df(self, alleles_df, allele_col): """ Input: Dataframe of (guide, allele) -> (per sample count) Output: @@ -676,7 +680,7 @@ def reindex_allele_df(self, alleles_df): global_allele_id: global unique id for each (guide, allele) pair. """ guide_to_allele = dict( - list(alleles_df[["guide", "aa_allele"]].groupby("guide").aa_allele) + list(alleles_df[["guide", allele_col]].groupby("guide")[allele_col]) ) dfs = [] for k, s in guide_to_allele.items(): @@ -690,14 +694,12 @@ def reindex_allele_df(self, alleles_df): guide_to_allele_tbl = pd.concat(dfs) - alleles_df = pd.merge( - alleles_df, guide_to_allele_tbl, on=["aa_allele", "guide"] - ) + alleles_df = pd.merge(alleles_df, guide_to_allele_tbl, on=[allele_col, "guide"]) reindexed_df = alleles_df.reset_index().set_index( ["guide", "allele_id_for_guide"] ) - guide_allele_id_to_allele = reindexed_df[["index", "aa_allele"]] - reindexed_allele_df = reindexed_df.drop(["aa_allele", "index"], axis=1) + guide_allele_id_to_allele = reindexed_df[["index", allele_col]] + reindexed_allele_df = reindexed_df.drop([allele_col, "index"], axis=1) return (guide_allele_id_to_allele, reindexed_allele_df) def transform_allele(self, adata, reindexed_df): diff --git a/bean/preprocessing/utils.py b/bean/preprocessing/utils.py index 4e3d290..46146c5 100755 --- a/bean/preprocessing/utils.py +++ b/bean/preprocessing/utils.py @@ -153,9 +153,12 @@ def get_edit_to_index_dict(cnalleles: pd.Series) -> Dict[str, int]: Arguments cnalleles -- pd.Series object of CodingNoncodingAllele objects. """ - edit_lists = cnalleles.map( - lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits) - ) + try: + edit_lists = cnalleles.map( + lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits) + ) + except AttributeError: + edit_lists = cnalleles.map(lambda a: list(a.edits)) edits = pd.Series( pd.Series( [e.get_abs_edit() for l in edit_lists.tolist() for e in l], dtype="object" diff --git a/setup.py b/setup.py index 5b83055..9e505fa 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="crispr-bean", - version="1.2.4", + version="1.2.5", python_requires=">=3.8.0", author="Jayoung Ryu", author_email="jayoung_ryu@g.harvard.edu", diff --git a/tests/test_run.py b/tests/test_run.py index ca21f03..7f60b85 100755 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -161,7 +161,7 @@ def test_run_tiling_with_negctrl_uniform(): # Add fit_negctrl examples -@pytest.mark.order(417) +@pytest.mark.order(427) def test_survival_run_variant_noacc(): cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --n-iter 10 --control-condition=D7" try: @@ -174,7 +174,7 @@ def test_survival_run_variant_noacc(): raise exc -@pytest.mark.order(418) +@pytest.mark.order(428) def test_survival_run_variant_wo_negctrl_uniform(): cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --uniform-edit --n-iter 10 --control-condition=D7" try: @@ -187,7 +187,7 @@ def test_survival_run_variant_wo_negctrl_uniform(): raise exc -@pytest.mark.order(420) +@pytest.mark.order(429) def test_survival_run_variant_noacc_negctrl(): cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --fit-negctrl --n-iter 10 --control-condition=D7" try: @@ -200,7 +200,7 @@ def test_survival_run_variant_noacc_negctrl(): raise exc -@pytest.mark.order(421) +@pytest.mark.order(430) def test_survival_run_variant_uniform_negctrl(): cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --uniform-edit --fit-negctrl --n-iter 10 --control-condition=D7" try: @@ -211,3 +211,16 @@ def test_survival_run_variant_uniform_negctrl(): ) except subprocess.CalledProcessError as exc: raise exc + + +@pytest.mark.order(431) +def test_run_tiling_no_translation(): + cmd = "bean run sorting tiling tests/data/tiling_mini_screen_annotated.h5ad -o tests/test_res/tiling/ --control-guide-tag None --repguide-mask None --n-iter 10 --allele-df-key allele_counts" + try: + subprocess.check_output( + cmd, + shell=True, + universal_newlines=True, + ) + except subprocess.CalledProcessError as exc: + raise exc