Skip to content

Commit

Permalink
move files
Browse files Browse the repository at this point in the history
  • Loading branch information
jykr committed Mar 30, 2024
1 parent 4f11b82 commit d7eaa05
Show file tree
Hide file tree
Showing 10 changed files with 290 additions and 245 deletions.
Empty file added bean/cli/__init__.py
Empty file.
195 changes: 90 additions & 105 deletions bin/bean-count → bean/cli/count.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,105 +1,90 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Count guides, optionally with reporter and alleles of a single sequencing sample."""

import logging
import os
import sys

import bean
from bean.mapping.utils import (
_check_arguments,
_get_input_parser,
_check_file,
_get_first_read_length,
_check_read_length,
)

logging.basicConfig(
level=logging.INFO,
format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n",
datefmt="%a, %d %b %Y %H:%M:%S",
stream=sys.stderr,
filemode="w",
)
error = logging.critical
warn = logging.warning
debug = logging.debug
info = logging.info


_ROOT = os.path.abspath(os.path.dirname(__file__))


def get_input_parser():
"""Get single-sample specific argument parser."""
parser = _get_input_parser()
parser.add_argument(
"--R1",
type=str,
help="fastq file for read 1",
required=True,
default="Fastq filename",
)
parser.add_argument(
"--R2",
type=str,
help="fastq file for read 2, sorted as the same name order as in --R1 file.",
required=True,
default="Fastq filename",
)
return parser


def check_arguments(args, info_logger, warn_logger, error_logger):
args = _check_arguments(
args, info_logger=info, warn_logger=warn, error_logger=error
)
_check_file(args.R1)
_check_file(args.R2)
read_length = _get_first_read_length(args.R1)
_check_read_length(args, read_length, warn)
return args


def main():
parser = get_input_parser()
args = parser.parse_args()

args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error)
args_dict = vars(args)

edited_from = args_dict["edited_base"]
match_target_pos = args_dict["match_target_pos"]

counter = bean.mp.GuideEditCounter(**args_dict)
counter.check_filter_fastq()

counter.get_counts()
if counter.count_reporter_edits:
counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[
counter.screen.uns["allele_counts"].allele.map(str) != "", :
]
if match_target_pos:
base_editing_map = {"A": "G", "C": "T"}
edited_to = base_editing_map[edited_from]
counter.screen.get_edit_mat_from_uns(
edited_from, edited_to, match_target_pos
)
counter.screen.write(f"{counter.output_dir}.h5ad")
counter.screen.to_Excel(f"{counter.output_dir}.xlsx")
info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx")
info("All Done!")
print(
r"""
_ _
/ \ '\ _
| \ \ __ ___ _ _ _ _| |_
\ \ | / _/ _ \ || | ' \ _|
`.__|/ \__\___/\_,_|_||_\__|
"""
)


if __name__ == "__main__":
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Count guides, optionally with reporter and alleles of a single sequencing sample."""

import logging
import os
import sys

import bean
from bean.mapping.utils import (
_check_arguments,
_check_file,
_get_first_read_length,
_check_read_length,
)

logging.basicConfig(
level=logging.INFO,
format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n",
datefmt="%a, %d %b %Y %H:%M:%S",
stream=sys.stderr,
filemode="w",
)
error = logging.critical
warn = logging.warning
debug = logging.debug
info = logging.info


_ROOT = os.path.abspath(os.path.dirname(__file__))





def check_arguments(args, info_logger, warn_logger, error_logger):
args = _check_arguments(
args, info_logger=info, warn_logger=warn, error_logger=error
)
_check_file(args.R1)
_check_file(args.R2)
read_length = _get_first_read_length(args.R1)
_check_read_length(args, read_length, warn)
return args


def main(args):
"""Get the input data"""
print(
r"""
_ _
/ \ '\ _
| \ \ __ ___ _ _ _ _| |_
\ \ | / _/ _ \ || | ' \ _|
`.__|/ \__\___/\_,_|_||_\__|
"""
)
args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error)
args_dict = vars(args)

edited_from = args_dict["edited_base"]
match_target_pos = args_dict["match_target_pos"]

counter = bean.mp.GuideEditCounter(**args_dict)
counter.check_filter_fastq()

counter.get_counts()
if counter.count_reporter_edits:
counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[
counter.screen.uns["allele_counts"].allele.map(str) != "", :
]
if match_target_pos:
base_editing_map = {"A": "G", "C": "T"}
edited_to = base_editing_map[edited_from]
counter.screen.get_edit_mat_from_uns(
edited_from, edited_to, match_target_pos
)
counter.screen.write(f"{counter.output_dir}.h5ad")
counter.screen.to_Excel(f"{counter.output_dir}.xlsx")
info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx")
info("All Done!")
print(
r"""
_ _
/ \ '\ _
| \ \ __ ___ _ _ _ _| |_
\ \ | / _/ _ \ || | ' \ _|
`.__|/ \__\___/\_,_|_||_\__|
"""
)
73 changes: 18 additions & 55 deletions bin/bean-count-samples → bean/cli/count_samples.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from bean.mapping.utils import (
InputFileError,
_check_arguments,
_get_input_parser,
_get_first_read_length,
_check_read_length,
)
Expand All @@ -32,48 +31,6 @@
info = logging.info


def get_input_parser() -> argparse.Namespace:
"""Add multi-sample specific arguments to the base parser."""
parser = _get_input_parser()
parser.add_argument(
"-i",
"--input",
type=str,
help="List of fastq and sample ids. Formatted as `R1_filepath,R2_filepath,sample_id`",
required=True,
)
parser.add_argument(
"-t", "--threads", type=int, help="Number of threads", default=10
)
parser.add_argument(
"--guide-start-seqs-file",
type=str,
help="CSV file path with per-sample `guide_start_seq` to be used."
+ "Formatted as `sample_id, guide_start_seq`",
default=None,
)
parser.add_argument(
"--guide-end-seqs-file",
type=str,
help="CSV file path with per-sample `guide_end_seq` to be used."
+ "Formatted as `sample_id,guide_end_seq`",
default=None,
)
parser.add_argument(
"--barcode-start-seqs-file",
type=str,
help="CSV file path with per-sample `barcode_start_seq` to be used."
+ "Formatted as `sample_id,guide_end_seq`",
default=None,
)

parser.add_argument(
"--rerun", help="Recount each sample", action="store_true", default=False
)

return parser


def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
"""Count single sample given R1 and R2 paths.
Arguments are modified accordingly to the provided sample_id before being passed to GuideEditCounter.
Expand Down Expand Up @@ -102,7 +59,9 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
"barcode_start_seqs_tbl" in args_dict
and args_dict["barcode_start_seqs_tbl"] is not None
):
args_dict["barcode_start_seq"] = str(args_dict["barcode_start_seqs_tbl"][sample_id])
args_dict["barcode_start_seq"] = str(
args_dict["barcode_start_seqs_tbl"][sample_id]
)
counter = bean.mp.GuideEditCounter(**args_dict)
if os.path.exists(f"{counter.output_dir}.h5ad") and not args_dict["rerun"]:
screen = bean.read_h5ad(f"{counter.output_dir}.h5ad")
Expand Down Expand Up @@ -147,10 +106,10 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
def check_arguments(args: argparse.Namespace) -> argparse.Namespace:
"""Checks the validity of the argument."""
args = _check_arguments(args, info, warn, error)
sample_tbl = pd.read_csv(args.input)
sample_tbl = pd.read_csv(args.sample_list)
if len(sample_tbl.iloc[:, 2].unique()) != len(sample_tbl.iloc[:, 2]):
raise InputFileError(
f"Sample ID not unique. Please check your input file {args.input}."
f"Sample ID not unique. Please check your input file {args.sample_list}."
)
first_read_lengths = [
_get_first_read_length(fastq_R1) for fastq_R1 in sample_tbl.iloc[:, 0]
Expand Down Expand Up @@ -187,11 +146,19 @@ def _check_return_guide_seqs_tbl(guide_seqs_file, sample_tbl, label):
return args


def main():
parser = get_input_parser()
args = parser.parse_args()
def main(args):
"""Get the input data"""
print(
r"""
_ _
/ \ '\ _
| \ \ __ ___ _ _ _ _| |_
\ \ | / _/ _ \ || | ' \ _|
`.__|/ \__\___/\_,_|_||_\__|
"""
)
args = check_arguments(args)
sample_tbl = pd.read_csv(args.input) # R1_filepath, R2_filepath, sample_name
sample_tbl = pd.read_csv(args.sample_list) # R1_filepath, R2_filepath, sample_name
sample_tbl_input = sample_tbl.iloc[:, :3]
sample_info_tbl = sample_tbl.iloc[:, 2:].set_index(sample_tbl.columns[2])
with Pool(processes=args.threads, maxtasksperchild=1) as p:
Expand All @@ -205,7 +172,7 @@ def main():
# result = p.starmap(count_sample, sample_tbl[0], sample_tbl[1], sample_tbl[2])

screen = bean.concat(result, axis=1)
database_id = args.name or os.path.basename(args.input).split(".")[0]
database_id = args.name or os.path.basename(args.sample_list).split(".")[0]
output_path = os.path.join(
os.path.abspath(args.output_folder), f"bean_count_{database_id}"
)
Expand All @@ -230,7 +197,3 @@ def main():
`.__|/ \__\___/\_,_|_||_\__|
"""
)


if __name__ == "__main__":
main()
20 changes: 12 additions & 8 deletions bin/bean-create-screen → bean/cli/create_screen.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import sys
import logging
from bean.framework.read_from_csvs import get_input_parser, create_screen
from bean.framework.read_from_csvs import create_screen

logging.basicConfig(
level=logging.INFO,
Expand All @@ -19,15 +19,19 @@
info = logging.info


def main():
parser = get_input_parser()
args = parser.parse_args()
def main(args):
"""Add multi-sample specific arguments to the base parser."""
print(
r"""
_ _
/ \ '\ _
| \ \ __ _ _ ___ __ _| |_ ___
\ \ | / _| '_/ -_) _` | _/ -_)
`.__|/ \__|_| \___\__,_|\__\___|
"""
)
screen = create_screen(args)
info(f"Done obtaining screen:\n{screen}\nWriting result...")
output_path = f"{args.output_prefix if args.output_prefix else os.path.splitext(args.gRNA_counts_table_csv)[0]}.h5ad"
screen.write(output_path)
info(f"Done writing screen object to {output_path}.")


if __name__ == "__main__":
main()
Loading

0 comments on commit d7eaa05

Please sign in to comment.