move files

pinellolab · Mar 30, 2024 · d7eaa05 · d7eaa05
1 parent 4f11b82
commit d7eaa05
Show file tree

Hide file tree

Showing 10 changed files with 290 additions and 245 deletions.
diff --git a/bean/cli/__init__.py b/bean/cli/__init__.py
diff --git a/bin/bean-count → bean/cli/count.py b/bin/bean-count → bean/cli/count.py
@@ -1,105 +1,90 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""Count guides, optionally with reporter and alleles of a single sequencing sample."""
-
-import logging
-import os
-import sys
-
-import bean
-from bean.mapping.utils import (
-    _check_arguments,
-    _get_input_parser,
-    _check_file,
-    _get_first_read_length,
-    _check_read_length,
-)
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n",
-    datefmt="%a, %d %b %Y %H:%M:%S",
-    stream=sys.stderr,
-    filemode="w",
-)
-error = logging.critical
-warn = logging.warning
-debug = logging.debug
-info = logging.info
-
-
-_ROOT = os.path.abspath(os.path.dirname(__file__))
-
-
-def get_input_parser():
-    """Get single-sample specific argument parser."""
-    parser = _get_input_parser()
-    parser.add_argument(
-        "--R1",
-        type=str,
-        help="fastq file for read 1",
-        required=True,
-        default="Fastq filename",
-    )
-    parser.add_argument(
-        "--R2",
-        type=str,
-        help="fastq file for read 2, sorted as the same name order as in --R1 file.",
-        required=True,
-        default="Fastq filename",
-    )
-    return parser
-
-
-def check_arguments(args, info_logger, warn_logger, error_logger):
-    args = _check_arguments(
-        args, info_logger=info, warn_logger=warn, error_logger=error
-    )
-    _check_file(args.R1)
-    _check_file(args.R2)
-    read_length = _get_first_read_length(args.R1)
-    _check_read_length(args, read_length, warn)
-    return args
-
-
-def main():
-    parser = get_input_parser()
-    args = parser.parse_args()
-
-    args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error)
-    args_dict = vars(args)
-
-    edited_from = args_dict["edited_base"]
-    match_target_pos = args_dict["match_target_pos"]
-
-    counter = bean.mp.GuideEditCounter(**args_dict)
-    counter.check_filter_fastq()
-
-    counter.get_counts()
-    if counter.count_reporter_edits:
-        counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[
-            counter.screen.uns["allele_counts"].allele.map(str) != "", :
-        ]
-        if match_target_pos:
-            base_editing_map = {"A": "G", "C": "T"}
-            edited_to = base_editing_map[edited_from]
-            counter.screen.get_edit_mat_from_uns(
-                edited_from, edited_to, match_target_pos
-            )
-    counter.screen.write(f"{counter.output_dir}.h5ad")
-    counter.screen.to_Excel(f"{counter.output_dir}.xlsx")
-    info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx")
-    info("All Done!")
-    print(
-        r"""
-    _ _       
-  /  \ '\                       _   
-  |   \  \      __ ___ _  _ _ _| |_ 
-   \   \  |    / _/ _ \ || | ' \  _|
-    `.__|/     \__\___/\_,_|_||_\__|
-    """
-    )
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Count guides, optionally with reporter and alleles of a single sequencing sample."""
+
+import logging
+import os
+import sys
+
+import bean
+from bean.mapping.utils import (
+    _check_arguments,
+    _check_file,
+    _get_first_read_length,
+    _check_read_length,
+)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(levelname)-5s @ %(asctime)s:\n\t %(message)s \n",
+    datefmt="%a, %d %b %Y %H:%M:%S",
+    stream=sys.stderr,
+    filemode="w",
+)
+error = logging.critical
+warn = logging.warning
+debug = logging.debug
+info = logging.info
+
+
+_ROOT = os.path.abspath(os.path.dirname(__file__))
+
+
+
+
+
+def check_arguments(args, info_logger, warn_logger, error_logger):
+    args = _check_arguments(
+        args, info_logger=info, warn_logger=warn, error_logger=error
+    )
+    _check_file(args.R1)
+    _check_file(args.R2)
+    read_length = _get_first_read_length(args.R1)
+    _check_read_length(args, read_length, warn)
+    return args
+
+
+def main(args):
+    """Get the input data"""
+    print(
+        r"""
+    _ _       
+  /  \ '\                       _   
+  |   \  \      __ ___ _  _ _ _| |_ 
+   \   \  |    / _/ _ \ || | ' \  _|
+    `.__|/     \__\___/\_,_|_||_\__|
+    """
+    )
+    args = check_arguments(args, info_logger=info, warn_logger=warn, error_logger=error)
+    args_dict = vars(args)
+
+    edited_from = args_dict["edited_base"]
+    match_target_pos = args_dict["match_target_pos"]
+
+    counter = bean.mp.GuideEditCounter(**args_dict)
+    counter.check_filter_fastq()
+
+    counter.get_counts()
+    if counter.count_reporter_edits:
+        counter.screen.uns["allele_counts"] = counter.screen.uns["allele_counts"].loc[
+            counter.screen.uns["allele_counts"].allele.map(str) != "", :
+        ]
+        if match_target_pos:
+            base_editing_map = {"A": "G", "C": "T"}
+            edited_to = base_editing_map[edited_from]
+            counter.screen.get_edit_mat_from_uns(
+                edited_from, edited_to, match_target_pos
+            )
+    counter.screen.write(f"{counter.output_dir}.h5ad")
+    counter.screen.to_Excel(f"{counter.output_dir}.xlsx")
+    info(f"Output written at:\n {counter.output_dir}.h5ad,\n {counter.output_dir}.xlsx")
+    info("All Done!")
+    print(
+        r"""
+    _ _       
+  /  \ '\                       _   
+  |   \  \      __ ___ _  _ _ _| |_ 
+   \   \  |    / _/ _ \ || | ' \  _|
+    `.__|/     \__\___/\_,_|_||_\__|
+    """
+    )
diff --git a/bin/bean-count-samples → bean/cli/count_samples.py b/bin/bean-count-samples → bean/cli/count_samples.py
@@ -14,7 +14,6 @@
 from bean.mapping.utils import (
     InputFileError,
     _check_arguments,
-    _get_input_parser,
     _get_first_read_length,
     _check_read_length,
 )
@@ -32,48 +31,6 @@
 info = logging.info
 
 
-def get_input_parser() -> argparse.Namespace:
-    """Add multi-sample specific arguments to the base parser."""
-    parser = _get_input_parser()
-    parser.add_argument(
-        "-i",
-        "--input",
-        type=str,
-        help="List of fastq and sample ids. Formatted as `R1_filepath,R2_filepath,sample_id`",
-        required=True,
-    )
-    parser.add_argument(
-        "-t", "--threads", type=int, help="Number of threads", default=10
-    )
-    parser.add_argument(
-        "--guide-start-seqs-file",
-        type=str,
-        help="CSV file path with per-sample `guide_start_seq` to be used."
-        + "Formatted as `sample_id, guide_start_seq`",
-        default=None,
-    )
-    parser.add_argument(
-        "--guide-end-seqs-file",
-        type=str,
-        help="CSV file path with per-sample `guide_end_seq` to be used."
-        + "Formatted as `sample_id,guide_end_seq`",
-        default=None,
-    )
-    parser.add_argument(
-        "--barcode-start-seqs-file",
-        type=str,
-        help="CSV file path with per-sample `barcode_start_seq` to be used."
-        + "Formatted as `sample_id,guide_end_seq`",
-        default=None,
-    )
-
-    parser.add_argument(
-        "--rerun", help="Recount each sample", action="store_true", default=False
-    )
-
-    return parser
-
-
 def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
     """Count single sample given R1 and R2 paths.
     Arguments are modified accordingly to the provided sample_id before being passed to GuideEditCounter.
@@ -102,7 +59,9 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
         "barcode_start_seqs_tbl" in args_dict
         and args_dict["barcode_start_seqs_tbl"] is not None
     ):
-        args_dict["barcode_start_seq"] = str(args_dict["barcode_start_seqs_tbl"][sample_id])
+        args_dict["barcode_start_seq"] = str(
+            args_dict["barcode_start_seqs_tbl"][sample_id]
+        )
     counter = bean.mp.GuideEditCounter(**args_dict)
     if os.path.exists(f"{counter.output_dir}.h5ad") and not args_dict["rerun"]:
         screen = bean.read_h5ad(f"{counter.output_dir}.h5ad")
@@ -147,10 +106,10 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
 def check_arguments(args: argparse.Namespace) -> argparse.Namespace:
     """Checks the validity of the argument."""
     args = _check_arguments(args, info, warn, error)
-    sample_tbl = pd.read_csv(args.input)
+    sample_tbl = pd.read_csv(args.sample_list)
     if len(sample_tbl.iloc[:, 2].unique()) != len(sample_tbl.iloc[:, 2]):
         raise InputFileError(
-            f"Sample ID not unique. Please check your input file {args.input}."
+            f"Sample ID not unique. Please check your input file {args.sample_list}."
         )
     first_read_lengths = [
         _get_first_read_length(fastq_R1) for fastq_R1 in sample_tbl.iloc[:, 0]
@@ -187,11 +146,19 @@ def _check_return_guide_seqs_tbl(guide_seqs_file, sample_tbl, label):
     return args
 
 
-def main():
-    parser = get_input_parser()
-    args = parser.parse_args()
+def main(args):
+    """Get the input data"""
+    print(
+        r"""
+    _ _       
+  /  \ '\                       _   
+  |   \  \      __ ___ _  _ _ _| |_ 
+   \   \  |    / _/ _ \ || | ' \  _|
+    `.__|/     \__\___/\_,_|_||_\__|
+    """
+    )
     args = check_arguments(args)
-    sample_tbl = pd.read_csv(args.input)  # R1_filepath, R2_filepath, sample_name
+    sample_tbl = pd.read_csv(args.sample_list)  # R1_filepath, R2_filepath, sample_name
     sample_tbl_input = sample_tbl.iloc[:, :3]
     sample_info_tbl = sample_tbl.iloc[:, 2:].set_index(sample_tbl.columns[2])
     with Pool(processes=args.threads, maxtasksperchild=1) as p:
@@ -205,7 +172,7 @@ def main():
         # result = p.starmap(count_sample, sample_tbl[0], sample_tbl[1], sample_tbl[2])
 
     screen = bean.concat(result, axis=1)
-    database_id = args.name or os.path.basename(args.input).split(".")[0]
+    database_id = args.name or os.path.basename(args.sample_list).split(".")[0]
     output_path = os.path.join(
         os.path.abspath(args.output_folder), f"bean_count_{database_id}"
     )
@@ -230,7 +197,3 @@ def main():
     `.__|/     \__\___/\_,_|_||_\__|
     """
     )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bin/bean-create-screen → bean/cli/create_screen.py b/bin/bean-create-screen → bean/cli/create_screen.py
@@ -4,7 +4,7 @@
 import os
 import sys
 import logging
-from bean.framework.read_from_csvs import get_input_parser, create_screen
+from bean.framework.read_from_csvs import create_screen
 
 logging.basicConfig(
     level=logging.INFO,
@@ -19,15 +19,19 @@
 info = logging.info
 
 
-def main():
-    parser = get_input_parser()
-    args = parser.parse_args()
+def main(args):
+    """Add multi-sample specific arguments to the base parser."""
+    print(
+        r"""
+    _ _       
+  /  \ '\                      _        
+  |   \  \     __ _ _ ___ __ _| |_ ___ 
+   \   \  |   / _| '_/ -_) _` |  _/ -_)
+    `.__|/    \__|_| \___\__,_|\__\___|
+    """
+    )
     screen = create_screen(args)
     info(f"Done obtaining screen:\n{screen}\nWriting result...")
     output_path = f"{args.output_prefix if args.output_prefix else os.path.splitext(args.gRNA_counts_table_csv)[0]}.h5ad"
     screen.write(output_path)
     info(f"Done writing screen object to {output_path}.")
-
-
-if __name__ == "__main__":
-    main()