Skip to content

Commit

Permalink
Merge pull request #32 from pinellolab/dev
Browse files Browse the repository at this point in the history
Allow bean run tiling for untranslated --allele-df-key
  • Loading branch information
jykr authored May 7, 2024
2 parents 4ecbddd + 9a6f341 commit 5738498
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 24 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Changelog
## 1.2.5
* Allow `bean run .. tiling` for untranslated `--allele-df-key`.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Python package `bean` supports multiple data wrangling functionalities for `Repo
* Full pipeline takes 90.1s in GitHub Action for toy dataset of 2 replicates and 30 guides.

## Contributing
If you have questions or feature request, please open an issue. Please feel free to send a pull request.
See [CHANGELOG](CHANGELOG.md) for recent updates. If you have questions or feature request, please open an issue. Please feel free to send a pull request.

## Citation
If you have used BEAN for your analysis, please cite:
Expand Down
32 changes: 17 additions & 15 deletions bean/preprocessing/data_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,20 +581,20 @@ def _set_uid_to_row(row):
allele_counts_selected[allele_col].map(lambda a: "!" in str(a)).any()
), "uid not assinged."
guide_to_allele, reindexed_df = self.reindex_allele_df(
allele_counts_selected
allele_counts_selected, allele_col
) # TODO: Fix this for combining different sorting scheme
self.n_max_alleles = (
reindexed_df.index.get_level_values("allele_id_for_guide").max() + 1
) # include no edit allele

if edit_index is None:
self.edit_index = get_edit_to_index_dict(guide_to_allele.aa_allele)
self.edit_index = get_edit_to_index_dict(guide_to_allele[allele_col])
else:
self.edit_index = edit_index
self.n_edits = len(self.edit_index.keys())

self.allele_to_edit = self.get_allele_to_edit_tensor(
self.screen_control, self.edit_index, guide_to_allele
self.screen_control, self.edit_index, guide_to_allele, allele_col
)
assert self.allele_to_edit.shape == (
self.n_guides,
Expand Down Expand Up @@ -627,6 +627,7 @@ def get_allele_to_edit_tensor(
screen,
edits_to_index: Dict[str, int],
guide_allele_id_to_allele_df: pd.DataFrame,
allele_col="aa_allele",
) -> torch.Tensor:
"""
Convert (guide, allele_id_for_guide) -> allele DataFrame into the tensor with shape (n_guides, n_max_alleles_per_guide, n_edits) tensor.
Expand All @@ -638,11 +639,14 @@ def get_allele_to_edit_tensor(
Returns
allele_edit_assignment: Binary tensor of shape (n_guides, n_max_alleles_per_guide, n_edits. allele_edit_assignment(i, j, k) is 1 if jth allele of ith guide has kth edit.
"""
guide_allele_id_to_allele_df["edits"] = (
guide_allele_id_to_allele_df.aa_allele.map(
lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits)
)
)
if allele_col == "aa_allele":
guide_allele_id_to_allele_df["edits"] = guide_allele_id_to_allele_df[
allele_col
].map(lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits))
else:
guide_allele_id_to_allele_df["edits"] = guide_allele_id_to_allele_df[
allele_col
].map(lambda a: list(a.edits))
guide_allele_id_to_allele_df = guide_allele_id_to_allele_df.reset_index()
guide_allele_id_to_allele_df["edit_idx"] = (
guide_allele_id_to_allele_df.edits.map(
Expand All @@ -663,7 +667,7 @@ def get_allele_to_edit_tensor(
allele_edit_assignment[i, j, guide_allele_id_to_edit_df.iloc[i, j]] = 1
return allele_edit_assignment

def reindex_allele_df(self, alleles_df):
def reindex_allele_df(self, alleles_df, allele_col):
"""
Input: Dataframe of (guide, allele) -> (per sample count)
Output:
Expand All @@ -676,7 +680,7 @@ def reindex_allele_df(self, alleles_df):
global_allele_id: global unique id for each (guide, allele) pair.
"""
guide_to_allele = dict(
list(alleles_df[["guide", "aa_allele"]].groupby("guide").aa_allele)
list(alleles_df[["guide", allele_col]].groupby("guide")[allele_col])
)
dfs = []
for k, s in guide_to_allele.items():
Expand All @@ -690,14 +694,12 @@ def reindex_allele_df(self, alleles_df):

guide_to_allele_tbl = pd.concat(dfs)

alleles_df = pd.merge(
alleles_df, guide_to_allele_tbl, on=["aa_allele", "guide"]
)
alleles_df = pd.merge(alleles_df, guide_to_allele_tbl, on=[allele_col, "guide"])
reindexed_df = alleles_df.reset_index().set_index(
["guide", "allele_id_for_guide"]
)
guide_allele_id_to_allele = reindexed_df[["index", "aa_allele"]]
reindexed_allele_df = reindexed_df.drop(["aa_allele", "index"], axis=1)
guide_allele_id_to_allele = reindexed_df[["index", allele_col]]
reindexed_allele_df = reindexed_df.drop([allele_col, "index"], axis=1)
return (guide_allele_id_to_allele, reindexed_allele_df)

def transform_allele(self, adata, reindexed_df):
Expand Down
9 changes: 6 additions & 3 deletions bean/preprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,12 @@ def get_edit_to_index_dict(cnalleles: pd.Series) -> Dict[str, int]:
Arguments
cnalleles -- pd.Series object of CodingNoncodingAllele objects.
"""
edit_lists = cnalleles.map(
lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits)
)
try:
edit_lists = cnalleles.map(
lambda a: list(a.aa_allele.edits) + list(a.nt_allele.edits)
)
except AttributeError:
edit_lists = cnalleles.map(lambda a: list(a.edits))
edits = pd.Series(
pd.Series(
[e.get_abs_edit() for l in edit_lists.tolist() for e in l], dtype="object"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name="crispr-bean",
version="1.2.4",
version="1.2.5",
python_requires=">=3.8.0",
author="Jayoung Ryu",
author_email="[email protected]",
Expand Down
21 changes: 17 additions & 4 deletions tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_run_tiling_with_negctrl_uniform():
# Add fit_negctrl examples


@pytest.mark.order(417)
@pytest.mark.order(427)
def test_survival_run_variant_noacc():
cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --n-iter 10 --control-condition=D7"
try:
Expand All @@ -174,7 +174,7 @@ def test_survival_run_variant_noacc():
raise exc


@pytest.mark.order(418)
@pytest.mark.order(428)
def test_survival_run_variant_wo_negctrl_uniform():
cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --uniform-edit --n-iter 10 --control-condition=D7"
try:
Expand All @@ -187,7 +187,7 @@ def test_survival_run_variant_wo_negctrl_uniform():
raise exc


@pytest.mark.order(420)
@pytest.mark.order(429)
def test_survival_run_variant_noacc_negctrl():
cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --fit-negctrl --n-iter 10 --control-condition=D7"
try:
Expand All @@ -200,7 +200,7 @@ def test_survival_run_variant_noacc_negctrl():
raise exc


@pytest.mark.order(421)
@pytest.mark.order(430)
def test_survival_run_variant_uniform_negctrl():
cmd = "bean run survival variant tests/data/survival_var_mini_screen_masked.h5ad -o tests/test_res/var/ --uniform-edit --fit-negctrl --n-iter 10 --control-condition=D7"
try:
Expand All @@ -211,3 +211,16 @@ def test_survival_run_variant_uniform_negctrl():
)
except subprocess.CalledProcessError as exc:
raise exc


@pytest.mark.order(431)
def test_run_tiling_no_translation():
cmd = "bean run sorting tiling tests/data/tiling_mini_screen_annotated.h5ad -o tests/test_res/tiling/ --control-guide-tag None --repguide-mask None --n-iter 10 --allele-df-key allele_counts"
try:
subprocess.check_output(
cmd,
shell=True,
universal_newlines=True,
)
except subprocess.CalledProcessError as exc:
raise exc

0 comments on commit 5738498

Please sign in to comment.