From d7eaa055eaaf54edd88fa2ede64f73dcdf4b0823 Mon Sep 17 00:00:00 2001 From: jykr Date: Fri, 29 Mar 2024 22:09:17 -0400 Subject: [PATCH] move files --- bean/cli/__init__.py | 0 bin/bean-count => bean/cli/count.py | 195 ++++++++---------- .../cli/count_samples.py | 73 ++----- .../cli/create_screen.py | 20 +- bean/cli/execute.py | 60 ++++++ bin/bean-filter => bean/cli/filter.py | 41 ++-- bin/bean-profile => bean/cli/profile.py | 18 +- bin/bean-qc => bean/cli/qc.py | 102 ++++----- bin/bean-run => bean/cli/run.py | 21 +- bin/bean | 5 + 10 files changed, 290 insertions(+), 245 deletions(-) create mode 100755 bean/cli/__init__.py rename bin/bean-count => bean/cli/count.py (77%) mode change 100755 => 100644 rename bin/bean-count-samples => bean/cli/count_samples.py (77%) mode change 100755 => 100644 rename bin/bean-create-screen => bean/cli/create_screen.py (65%) mode change 100755 => 100644 create mode 100755 bean/cli/execute.py rename bin/bean-filter => bean/cli/filter.py (87%) mode change 100755 => 100644 rename bin/bean-profile => bean/cli/profile.py (70%) mode change 100755 => 100644 rename bin/bean-qc => bean/cli/qc.py (73%) mode change 100755 => 100644 rename bin/bean-run => bean/cli/run.py (93%) mode change 100755 => 100644 create mode 100755 bin/bean diff --git a/bean/cli/__init__.py b/bean/cli/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/bin/bean-count b/bean/cli/count.py old mode 100755 new mode 100644 similarity index 77% rename from bin/bean-count rename to bean/cli/count.py index 49a7153..73cf1a9 --- a/bin/bean-count +++ b/bean/cli/count.py @@ -1,105 +1,90 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -"""Count guides, optionally with reporter and alleles of a single sequencing sample.""" - -import logging -import os -import sys - -import bean -from bean.mapping.utils import ( - _check_arguments, - _get_input_parser, - _check_file, - _get_first_read_length, - _check_read_length, -) - -logging.basicConfig( - level=logging.INFO, - format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n", - datefmt="%a, %d %b %Y %H:%M:%S", - stream=sys.stderr, - filemode="w", -) -error = logging.critical -warn = logging.warning -debug = logging.debug -info = logging.info - - -_ROOT = os.path.abspath(os.path.dirname(__file__)) - - -def get_input_parser(): - """Get single-sample specific argument parser.""" - parser = _get_input_parser() - parser.add_argument( - "--R1", - type=str, - help="fastq file for read 1", - required=True, - default="Fastq filename", - ) - parser.add_argument( - "--R2", - type=str, - help="fastq file for read 2, sorted as the same name order as in --R1 file.", - required=True, - default="Fastq filename", - ) - return parser - - -def check_arguments(args, info_logger, warn_logger, error_logger): - args = _check_arguments( - args, info_logger=info, warn_logger=warn, error_logger=error - ) - _check_file(args.R1) - _check_file(args.R2) - read_length = _get_first_read_length(args.R1) - _check_read_length(args, read_length, warn) - return args - - -def main(): - parser = get_input_parser() - args = parser.parse_args() - - args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error) - args_dict = vars(args) - - edited_from = args_dict["edited_base"] - match_target_pos = args_dict["match_target_pos"] - - counter = bean.mp.GuideEditCounter(**args_dict) - counter.check_filter_fastq() - - counter.get_counts() - if counter.count_reporter_edits: - counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[ - counter.screen.uns["allele_counts"].allele.map(str) != "", : - ] - if match_target_pos: - base_editing_map = {"A": "G", "C": "T"} - edited_to = base_editing_map[edited_from] - counter.screen.get_edit_mat_from_uns( - edited_from, edited_to, match_target_pos - ) - counter.screen.write(f"{counter.output_dir}.h5ad") - counter.screen.to_Excel(f"{counter.output_dir}.xlsx") - info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx") - info("All Done!") - print( - r""" - _ _ - / \ '\ _ - | \ \ __ ___ _ _ _ _| |_ - \ \ | / _/ _ \ || | ' \ _| - `.__|/ \__\___/\_,_|_||_\__| - """ - ) - - -if __name__ == "__main__": - main() +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Count guides, optionally with reporter and alleles of a single sequencing sample.""" + +import logging +import os +import sys + +import bean +from bean.mapping.utils import ( + _check_arguments, + _check_file, + _get_first_read_length, + _check_read_length, +) + +logging.basicConfig( + level=logging.INFO, + format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n", + datefmt="%a, %d %b %Y %H:%M:%S", + stream=sys.stderr, + filemode="w", +) +error = logging.critical +warn = logging.warning +debug = logging.debug +info = logging.info + + +_ROOT = os.path.abspath(os.path.dirname(__file__)) + + + + + +def check_arguments(args, info_logger, warn_logger, error_logger): + args = _check_arguments( + args, info_logger=info, warn_logger=warn, error_logger=error + ) + _check_file(args.R1) + _check_file(args.R2) + read_length = _get_first_read_length(args.R1) + _check_read_length(args, read_length, warn) + return args + + +def main(args): + """Get the input data""" + print( + r""" + _ _ + / \ '\ _ + | \ \ __ ___ _ _ _ _| |_ + \ \ | / _/ _ \ || | ' \ _| + `.__|/ \__\___/\_,_|_||_\__| + """ + ) + args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error) + args_dict = vars(args) + + edited_from = args_dict["edited_base"] + match_target_pos = args_dict["match_target_pos"] + + counter = bean.mp.GuideEditCounter(**args_dict) + counter.check_filter_fastq() + + counter.get_counts() + if counter.count_reporter_edits: + counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[ + counter.screen.uns["allele_counts"].allele.map(str) != "", : + ] + if match_target_pos: + base_editing_map = {"A": "G", "C": "T"} + edited_to = base_editing_map[edited_from] + counter.screen.get_edit_mat_from_uns( + edited_from, edited_to, match_target_pos + ) + counter.screen.write(f"{counter.output_dir}.h5ad") + counter.screen.to_Excel(f"{counter.output_dir}.xlsx") + info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx") + info("All Done!") + print( + r""" + _ _ + / \ '\ _ + | \ \ __ ___ _ _ _ _| |_ + \ \ | / _/ _ \ || | ' \ _| + `.__|/ \__\___/\_,_|_||_\__| + """ + ) diff --git a/bin/bean-count-samples b/bean/cli/count_samples.py old mode 100755 new mode 100644 similarity index 77% rename from bin/bean-count-samples rename to bean/cli/count_samples.py index 06c3cdb..fcec874 --- a/bin/bean-count-samples +++ b/bean/cli/count_samples.py @@ -14,7 +14,6 @@ from bean.mapping.utils import ( InputFileError, _check_arguments, - _get_input_parser, _get_first_read_length, _check_read_length, ) @@ -32,48 +31,6 @@ info = logging.info -def get_input_parser() -> argparse.Namespace: - """Add multi-sample specific arguments to the base parser.""" - parser = _get_input_parser() - parser.add_argument( - "-i", - "--input", - type=str, - help="List of fastq and sample ids. Formatted as `R1_filepath,R2_filepath,sample_id`", - required=True, - ) - parser.add_argument( - "-t", "--threads", type=int, help="Number of threads", default=10 - ) - parser.add_argument( - "--guide-start-seqs-file", - type=str, - help="CSV file path with per-sample `guide_start_seq` to be used." - + "Formatted as `sample_id, guide_start_seq`", - default=None, - ) - parser.add_argument( - "--guide-end-seqs-file", - type=str, - help="CSV file path with per-sample `guide_end_seq` to be used." - + "Formatted as `sample_id,guide_end_seq`", - default=None, - ) - parser.add_argument( - "--barcode-start-seqs-file", - type=str, - help="CSV file path with per-sample `barcode_start_seq` to be used." - + "Formatted as `sample_id,guide_end_seq`", - default=None, - ) - - parser.add_argument( - "--rerun", help="Recount each sample", action="store_true", default=False - ) - - return parser - - def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace): """Count single sample given R1 and R2 paths. Arguments are modified accordingly to the provided sample_id before being passed to GuideEditCounter. @@ -102,7 +59,9 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace): "barcode_start_seqs_tbl" in args_dict and args_dict["barcode_start_seqs_tbl"] is not None ): - args_dict["barcode_start_seq"] = str(args_dict["barcode_start_seqs_tbl"][sample_id]) + args_dict["barcode_start_seq"] = str( + args_dict["barcode_start_seqs_tbl"][sample_id] + ) counter = bean.mp.GuideEditCounter(**args_dict) if os.path.exists(f"{counter.output_dir}.h5ad") and not args_dict["rerun"]: screen = bean.read_h5ad(f"{counter.output_dir}.h5ad") @@ -147,10 +106,10 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace): def check_arguments(args: argparse.Namespace) -> argparse.Namespace: """Checks the validity of the argument.""" args = _check_arguments(args, info, warn, error) - sample_tbl = pd.read_csv(args.input) + sample_tbl = pd.read_csv(args.sample_list) if len(sample_tbl.iloc[:, 2].unique()) != len(sample_tbl.iloc[:, 2]): raise InputFileError( - f"Sample ID not unique. Please check your input file {args.input}." + f"Sample ID not unique. Please check your input file {args.sample_list}." ) first_read_lengths = [ _get_first_read_length(fastq_R1) for fastq_R1 in sample_tbl.iloc[:, 0] @@ -187,11 +146,19 @@ def _check_return_guide_seqs_tbl(guide_seqs_file, sample_tbl, label): return args -def main(): - parser = get_input_parser() - args = parser.parse_args() +def main(args): + """Get the input data""" + print( + r""" + _ _ + / \ '\ _ + | \ \ __ ___ _ _ _ _| |_ + \ \ | / _/ _ \ || | ' \ _| + `.__|/ \__\___/\_,_|_||_\__| + """ + ) args = check_arguments(args) - sample_tbl = pd.read_csv(args.input) # R1_filepath, R2_filepath, sample_name + sample_tbl = pd.read_csv(args.sample_list) # R1_filepath, R2_filepath, sample_name sample_tbl_input = sample_tbl.iloc[:, :3] sample_info_tbl = sample_tbl.iloc[:, 2:].set_index(sample_tbl.columns[2]) with Pool(processes=args.threads, maxtasksperchild=1) as p: @@ -205,7 +172,7 @@ def main(): # result = p.starmap(count_sample, sample_tbl[0], sample_tbl[1], sample_tbl[2]) screen = bean.concat(result, axis=1) - database_id = args.name or os.path.basename(args.input).split(".")[0] + database_id = args.name or os.path.basename(args.sample_list).split(".")[0] output_path = os.path.join( os.path.abspath(args.output_folder), f"bean_count_{database_id}" ) @@ -230,7 +197,3 @@ def main(): `.__|/ \__\___/\_,_|_||_\__| """ ) - - -if __name__ == "__main__": - main() diff --git a/bin/bean-create-screen b/bean/cli/create_screen.py old mode 100755 new mode 100644 similarity index 65% rename from bin/bean-create-screen rename to bean/cli/create_screen.py index 596b5c9..d0c03de --- a/bin/bean-create-screen +++ b/bean/cli/create_screen.py @@ -4,7 +4,7 @@ import os import sys import logging -from bean.framework.read_from_csvs import get_input_parser, create_screen +from bean.framework.read_from_csvs import create_screen logging.basicConfig( level=logging.INFO, @@ -19,15 +19,19 @@ info = logging.info -def main(): - parser = get_input_parser() - args = parser.parse_args() +def main(args): + """Add multi-sample specific arguments to the base parser.""" + print( + r""" + _ _ + / \ '\ _ + | \ \ __ _ _ ___ __ _| |_ ___ + \ \ | / _| '_/ -_) _` | _/ -_) + `.__|/ \__|_| \___\__,_|\__\___| + """ + ) screen = create_screen(args) info(f"Done obtaining screen:\n{screen}\nWriting result...") output_path = f"{args.output_prefix if args.output_prefix else os.path.splitext(args.gRNA_counts_table_csv)[0]}.h5ad" screen.write(output_path) info(f"Done writing screen object to {output_path}.") - - -if __name__ == "__main__": - main() diff --git a/bean/cli/execute.py b/bean/cli/execute.py new file mode 100755 index 0000000..8932e5e --- /dev/null +++ b/bean/cli/execute.py @@ -0,0 +1,60 @@ +import argparse +from bean.mapping.utils import get_input_parser_count as get_count_parser +from bean.mapping.utils import get_input_parser as get_count_samples_parser +from bean.plotting.utils import parse_args as get_profile_parser +from bean.qc.utils import parse_args as get_qc_parser +from bean.annotate.utils import parse_args as get_filter_parser +from bean.model.run import parse_args as get_run_parser +from bean.framework.read_from_csvs import get_input_parser as get_create_screen_parser +from bean.cli.count import main as count +from bean.cli.count_samples import main as count_samples +from bean.cli.profile import main as profile +from bean.cli.qc import main as qc +from bean.cli.filter import main as filter +from bean.cli.run import main as run +from bean.cli.create_screen import main as create_screen + + +def get_base_parser(): + parser = argparse.ArgumentParser(prog="bean") + subparsers = parser.add_subparsers(help="Subcommands", dest="subcommand") + count_parser = subparsers.add_parser("count", help="count") + count_parser = get_count_parser(count_parser) + count_samples_parser = subparsers.add_parser("count-samples", help="count samples") + count_samples_parser = get_count_samples_parser(count_samples_parser) + profile_parser = subparsers.add_parser("profile", help="profile") + profile_parser = get_profile_parser(profile_parser) + qc_parser = subparsers.add_parser("qc", help="qc") + qc_parser = get_qc_parser(qc_parser) + filter_parser = subparsers.add_parser("filter", help="filter") + filter_parser = get_filter_parser(filter_parser) + run_parser = subparsers.add_parser("run", help="run") + run_parser = get_run_parser(run_parser) + create_screen_parser = subparsers.add_parser("create_screen", help="create") + create_screen_parser = get_create_screen_parser(create_screen_parser) + return parser + + +global_parser = None + + +def main() -> None: + parser = get_parser() + global_parser = parser + args = parser.parse_args() + if args.subcommand == "count": + count(args) + elif args.subcommand == "count-samples": + count_samples(args) + elif args.subcommand == "profile": + profile(args) + elif args.subcommand == "qc": + qc(args) + elif args.subcommand == "filter": + filter(args) + elif args.subcommand == "run": + run(args) + elif args.subcommand == "create-screen": + create_screen(args) + else: + parser.print_help() diff --git a/bin/bean-filter b/bean/cli/filter.py old mode 100755 new mode 100644 similarity index 87% rename from bin/bean-filter rename to bean/cli/filter.py index 128ef42..bfbbdc7 --- a/bin/bean-filter +++ b/bean/cli/filter.py @@ -5,13 +5,12 @@ import logging import pandas as pd import bean as be +import bean.annotate.filter_alleles as filter_alleles from bean.plotting.allele_stats import ( - plot_n_alleles_per_guide, - plot_n_guides_per_edit, plot_allele_stats, ) from bean.annotate.translate_allele import get_mismatch_df -from bean.annotate.utils import parse_args, check_args +from bean.annotate.utils import check_args import matplotlib.pyplot as plt plt.style.use("default") @@ -28,8 +27,18 @@ info = logging.info -if __name__ == "__main__": - args = parse_args() +def main(args): + """Get the input arguments""" + print( + r""" + _ _ + / \ '\ __ _ _ _ + | \ \ / _(_) | |_ ___ _ _ + \ \ | | _| | | _/ -_) '_| + `.__|/ |_| |_|_|\__\___|_| + """ + ) + print("bean-filter: filter alleles") args = check_args(args) if not args.load_tmp: bdata = be.read_h5ad(args.bdata_path) @@ -49,7 +58,7 @@ ( q_val_each, sig_allele_df, - ) = be.an.filter_alleles.filter_alleles( + ) = filter_alleles.filter_alleles( bdata, plasmid_adata, filter_each_sample=True, run_parallel=True ) bdata.uns["sig_allele_counts"] = sig_allele_df.reset_index(drop=True) @@ -59,17 +68,15 @@ print(len(bdata.uns[allele_df_keys[-1]])) if len(bdata.uns[allele_df_keys[-1]]) >= 1: info("Filtering out edits outside spacer position...") - bdata.uns[ - f"{allele_df_keys[-1]}_spacer" - ] = bdata.filter_allele_counts_by_pos( - rel_pos_start=0, - rel_pos_end=20, - rel_pos_is_reporter=False, - map_to_filtered=True, - allele_uns_key=allele_df_keys[-1], - jaccard_threshold=0.2, - ).reset_index( - drop=True + bdata.uns[f"{allele_df_keys[-1]}_spacer"] = ( + bdata.filter_allele_counts_by_pos( + rel_pos_start=0, + rel_pos_end=20, + rel_pos_is_reporter=False, + map_to_filtered=True, + allele_uns_key=allele_df_keys[-1], + jaccard_threshold=0.2, + ).reset_index(drop=True) ) info( f"Filtered down to {len(bdata.uns[f'{allele_df_keys[-1]}_spacer'])} alleles." diff --git a/bin/bean-profile b/bean/cli/profile.py old mode 100755 new mode 100644 similarity index 70% rename from bin/bean-profile rename to bean/cli/profile.py index 8b48daa..abb1c87 --- a/bin/bean-profile +++ b/bean/cli/profile.py @@ -5,8 +5,18 @@ from bean.plotting.utils import parse_args, check_args -def main(): - args = parse_args() +def main(args): + print(" \n~~~BEAN Profile~~~") + print("-Profile editing patterns of your editor-") + print( + r""" + _ _ __ _ _ + / \ '\ _ __ _ _ ___ / _(_) |___ + | \ \ | '_ \ '_/ _ \ _| | / -_) + \ \ | | .__/_| \___/_| |_|_\___| + `.__|/ |_| + """ + ) args = check_args(args) os.system( "python -m ipykernel install --user --name bean_python3 --display-name bean_python3" @@ -28,7 +38,3 @@ def main(): os.system( f"jupyter nbconvert --to html {args.output_prefix}_editing_preference.ipynb" ) - - -if __name__ == "__main__": - main() diff --git a/bin/bean-qc b/bean/cli/qc.py old mode 100755 new mode 100644 similarity index 73% rename from bin/bean-qc rename to bean/cli/qc.py index a6880bf..4102ac8 --- a/bin/bean-qc +++ b/bean/cli/qc.py @@ -1,46 +1,56 @@ -#!/usr/bin/env python -import os -import papermill as pm -import bean as be -from bean.qc.utils import parse_args, check_args - - -def main(): - args = parse_args() - args = check_args(args) - os.system( - "python -m ipykernel install --user --name bean_python3 --display-name bean_python3" - ) - pm.execute_notebook( - f"{os.path.dirname(be.__file__)}/../notebooks/sample_quality_report.ipynb", - f"{args.out_report_prefix}.ipynb", - parameters=dict( - bdata_path=args.bdata_path, - out_bdata_path=args.out_screen_path, - tiling=args.tiling, - edit_quantification_start_pos=args.edit_start_pos, - edit_quantification_end_pos=args.edit_end_pos, - target_pos_col=args.target_pos_col, - rel_pos_is_reporter=args.rel_pos_is_reporter, - corr_X_thres=args.count_correlation_thres, - edit_rate_thres=args.edit_rate_thres, - posctrl_col=args.posctrl_col, - posctrl_val=args.posctrl_val, - lfc_thres=args.lfc_thres, - replicate_label=args.replicate_label, - condition_label=args.condition_label, - comp_cond1=args.lfc_cond1, - comp_cond2=args.lfc_cond2, - ctrl_cond=args.control_condition, - exp_id=args.out_report_prefix, - recalculate_edits=~args.dont_recalculate_edits, - base_edit_data=args.base_edit_data, - remove_bad_replicates=args.remove_bad_replicates, - ), - kernel_name="bean_python3", - ) - os.system(f"jupyter nbconvert --to html {args.out_report_prefix}.ipynb") - - -if __name__ == "__main__": - main() +#!/usr/bin/env python +import os +import papermill as pm +import bean as be +from bean.qc.utils import parse_args, check_args + + +def main(args): + print(" \n~~~BEANQC~~~") + print("-Check guide/sample level quality and mask / discard-") + print( + r""" + _ _ + / \ '\ ___ ___ + | \ \ / _ \ / __| + \ \ | | (_) | (__ + `.__|/ \__\_\\___| + """ + ) + if args.out_screen_path is None: + args.out_screen_path = f"{args.bdata_path.rsplit('.h5ad', 1)[0]}.filtered.h5ad" + if args.out_report_prefix is None: + args.out_report_prefix = f"{args.bdata_path.rsplit('.h5ad', 1)[0]}.qc_report" + args = check_args(args) + os.system( + "python -m ipykernel install --user --name bean_python3 --display-name bean_python3" + ) + pm.execute_notebook( + f"{os.path.dirname(be.__file__)}/../notebooks/sample_quality_report.ipynb", + f"{args.out_report_prefix}.ipynb", + parameters=dict( + bdata_path=args.bdata_path, + out_bdata_path=args.out_screen_path, + tiling=args.tiling, + edit_quantification_start_pos=args.edit_start_pos, + edit_quantification_end_pos=args.edit_end_pos, + target_pos_col=args.target_pos_col, + rel_pos_is_reporter=args.rel_pos_is_reporter, + corr_X_thres=args.count_correlation_thres, + edit_rate_thres=args.edit_rate_thres, + posctrl_col=args.posctrl_col, + posctrl_val=args.posctrl_val, + lfc_thres=args.lfc_thres, + replicate_label=args.replicate_label, + condition_label=args.condition_label, + comp_cond1=args.lfc_cond1, + comp_cond2=args.lfc_cond2, + ctrl_cond=args.control_condition, + exp_id=args.out_report_prefix, + recalculate_edits=~args.dont_recalculate_edits, + base_edit_data=args.base_edit_data, + remove_bad_replicates=args.remove_bad_replicates, + ), + kernel_name="bean_python3", + ) + os.system(f"jupyter nbconvert --to html {args.out_report_prefix}.ipynb") diff --git a/bin/bean-run b/bean/cli/run.py old mode 100755 new mode 100644 similarity index 93% rename from bin/bean-run rename to bean/cli/run.py index 540bd37..064cf46 --- a/bin/bean-run +++ b/bean/cli/run.py @@ -60,7 +60,19 @@ ) -def main(args, bdata): +def main(args): + print( + r""" + _ _ + / \ '\ + | \ \ _ _ _ _ _ _ + \ \ | | '_| || | ' \ + `.__|/ |_| \_,_|_||_| + """ + ) + print("bean-run: Run model to identify targeted variants and their impact.") + bdata = be.read_h5ad(args.bdata_path) + args, bdata = check_args(args, bdata) if args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = "1" torch.set_default_tensor_type(torch.cuda.FloatTensor) @@ -193,10 +205,3 @@ def main(args, bdata): ), ) info("Done!") - - -if __name__ == "__main__": - args = parse_args() - bdata = be.read_h5ad(args.bdata_path) - args, bdata = check_args(args, bdata) - main(args, bdata) diff --git a/bin/bean b/bin/bean new file mode 100755 index 0000000..4591905 --- /dev/null +++ b/bin/bean @@ -0,0 +1,5 @@ +#!/usr/bin/env python +from bean.cli.execute import main + +if __name__ == "__main__": + main()