diff --git a/bean/annotate/_supporting_fn.py b/bean/annotate/_supporting_fn.py index 127964b..e3b34f5 100644 --- a/bean/annotate/_supporting_fn.py +++ b/bean/annotate/_supporting_fn.py @@ -1,5 +1,5 @@ from copy import deepcopy -from typing import List, Tuple +from typing import List, Tuple, Union from tqdm.auto import tqdm from ..framework.Edit import Edit, Allele from ..framework.AminoAcidEdit import CodingNoncodingAllele @@ -43,14 +43,18 @@ def filter_allele_by_pos( def filter_allele_by_base( allele: Allele, allowed_base_changes: List[Tuple] = None, - allowed_ref_base: str = None, - allowed_alt_base: str = None, + allowed_ref_base: Union[List, str] = None, + allowed_alt_base: Union[List, str] = None, ): """ Filter alleles based on position and return the filtered allele and number of filtered edits. """ filtered_edits = 0 + if isinstance(allowed_ref_base, str): + allowed_ref_base = [allowed_ref_base] + if isinstance(allowed_alt_base, str): + allowed_alt_base = [allowed_alt_base] if ( not (allowed_ref_base is None and allowed_alt_base is None) + (allowed_base_changes is None) @@ -64,15 +68,15 @@ def filter_allele_by_base( allele.edits.remove(edit) elif not allowed_ref_base is None: for edit in allele.edits.copy(): - if edit.ref_base != allowed_ref_base: + if edit.ref_base not in allowed_ref_base: filtered_edits += 1 allele.edits.remove(edit) - elif not allowed_alt_base is None and edit.alt_base != allowed_alt_base: + elif not allowed_alt_base is None and edit.alt_base not in allowed_alt_base: filtered_edits += 1 allele.edits.remove(edit) else: for edit in allele.edits.copy(): - if edit.alt_base != allowed_alt_base: + if edit.alt_base not in allowed_alt_base: filtered_edits += 1 allele.edits.remove(edit) return (allele, filtered_edits) @@ -145,7 +149,6 @@ def _map_alleles_to_filtered( raw_allele_counts.groupby("guide"), desc="Mapping alleles to closest filtered alleles", ): - guide_filtered_allele_counts = filtered_allele_counts.loc[ filtered_allele_counts.guide == guide, : ].set_index(allele_col) diff --git a/bean/annotate/translate_allele.py b/bean/annotate/translate_allele.py index b82414a..6310717 100644 --- a/bean/annotate/translate_allele.py +++ b/bean/annotate/translate_allele.py @@ -175,8 +175,8 @@ def _translate_single_codon( ) -> str: # nt_seq_string: str, aa_pos: int) -> str: """Translate `aa_pos`-th codon of `nt_seq_string`.""" if len(codon) != 3: - print("reached the end of CDS, frameshift.") - return "/" + raise ValueError("reached the end of CDS, frameshift.") + return ">" try: codon = "".join(codon) return codon_map[codon] @@ -302,7 +302,7 @@ def edit_single(self, edit_str): ) ) raise RefBaseMismatchException( - f"{self.gene_name + ';' if hasattr(self, 'gene_name') else ''}ref:{self.nt[rel_pos]} at pos {rel_pos}, got edit {edit}. Gene sequence: {''.join(self.nt)}" + f"{self.gene_name + ';' if hasattr(self, 'gene_name') else ''}ref:{self.nt[rel_pos]} at pos {rel_pos}, got edit {edit}." ) else: self.edited_nt[rel_pos] = alt_base diff --git a/bean/annotate/utils.py b/bean/annotate/utils.py index 97fe49b..2252187 100644 --- a/bean/annotate/utils.py +++ b/bean/annotate/utils.py @@ -280,6 +280,12 @@ def parse_args(): help="Only consider edit within window provided by (edit-start-pos, edit-end-pos). If this flag is not provided, `--edit-start-pos` and `--edit-end-pos` flags are ignored.", action="store_true", ) + parser.add_argument( + "--keep-indels", + "-i", + help="Include indels.", + action="store_true", + ) parser.add_argument( "--filter-target-basechange", "-b", diff --git a/bean/framework/ReporterScreen.py b/bean/framework/ReporterScreen.py index 37631d5..ce56061 100644 --- a/bean/framework/ReporterScreen.py +++ b/bean/framework/ReporterScreen.py @@ -333,7 +333,7 @@ def __getitem__(self, index): lambda slist: ".".join(slist) ) new_uns[k] = df.loc[guides_include, adata.var._rc.unique()] - #adata.var.pop("_rc") + # adata.var.pop("_rc") else: new_uns[k] = df.loc[guides_include, adata.var.rep.unique()] if not isinstance(df, pd.DataFrame): @@ -697,8 +697,8 @@ def filter_allele_counts_by_pos( def filter_allele_counts_by_base( self, - ref_base="A", - alt_base="G", + ref_base: Union[List, str] = "A", + alt_base: Union[List, str] = "G", allele_uns_key="allele_counts", map_to_filtered=True, jaccard_threshold: float = 0.5, diff --git a/bin/bean-filter b/bin/bean-filter index 7e6f013..128ef42 100644 --- a/bin/bean-filter +++ b/bin/bean-filter @@ -100,6 +100,18 @@ if __name__ == "__main__": allele_df_keys.append(filtered_key) info(f"Filtered down to {len(bdata.uns[filtered_key])} alleles.") + if len(bdata.uns[allele_df_keys[-1]]) > 0 and not args.keep_indels: + filtered_key = f"{allele_df_keys[-1]}_noindels" + info(f"Filtering out indels...") + bdata.uns[filtered_key] = bdata.filter_allele_counts_by_base( + ["A", "T", "G", "C"], + ["A", "T", "G", "C"], + map_to_filtered=False, + allele_uns_key=allele_df_keys[-1], + ).reset_index(drop=True) + info(f"Filtered down to {len(bdata.uns[filtered_key])} alleles.") + allele_df_keys.append(filtered_key) + if len(bdata.uns[allele_df_keys[-1]]) > 0 and args.filter_target_basechange: filtered_key = ( f"{allele_df_keys[-1]}_{bdata.base_edited_from}.{bdata.base_edited_to}"