-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvista.py
73 lines (54 loc) · 3.01 KB
/
vista.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import argparse
import subprocess
#AUTHOR: SEUNGMO LEE
def filter_vcfs_by_name(inputs, names):
filtered_vcfs = []
for name in names:
matching_vcfs = [vcf for vcf in inputs if name in vcf]
if not matching_vcfs:
raise ValueError(f"No VCF file found with '{name}' in the name.")
filtered_vcfs.extend(matching_vcfs)
return filtered_vcfs
def merge_vcfs(inputs, sample, output_folder):
if sample == "mouse" or sample == "human":
script = "scripts/vista_merge.py"
else:
raise ValueError("Invalid sample type. Use 'mouse' or 'human'.")
cmd = ["python", script, f"{output_folder}/VISTA.vcf"] + inputs
subprocess.run(cmd, check=True)
def analyze_vcfs(gold_ref, threshold, output_folder):
output_file = f"{output_folder}/modified_VISTA_{threshold}.vcf"
compare_cmd = ["python", "scripts/compare_script.py", str(threshold), f"{output_folder}/VISTA.vcf" , gold_ref, output_file]
subprocess.run(compare_cmd, check=True)
print(f"Comparison for {threshold} completed. Output file: {output_file}")
record = f"{output_folder}/summary.txt"
summarize_cmd = ["python", "scripts/summarize2.py", output_file, gold_ref, record]
subprocess.run(summarize_cmd, check=True)
print(f"Summarization for {threshold} completed. Summary output file: {record}")
def vista(args):
if len(args.inputs) < 4:
raise ValueError("At least 4 VCF files are required for -inputs.")
if args.sample == "human":
vcf_names_to_filter = ["octopus", "manta", "delly", "genomestrip"]
filtered_vcfs = filter_vcfs_by_name(args.inputs, vcf_names_to_filter)
else:
vcf_names_to_filter = ["lumpy", "manta", "clever", "popdel"]
filtered_vcfs = filter_vcfs_by_name(args.inputs, vcf_names_to_filter)
merge_vcfs(filtered_vcfs, args.sample, args.output)
if args.analysis:
if not args.gold or not args.threshold:
raise ValueError("Missing Gold Standard or threshold number!")
analyze_vcfs(args.gold, args.threshold, args.output)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Run VISTA \n Command: python vista.py -i [MANTA VCF] [LUMPY VCF] [DELLY VCF] [GENOMESTRIP VCF] [CLEVER VCF] [POPDEL VCF] [OCTOPUS VCF] -s [mouse or human] -o [output folder]\n Note: Input files' tool names should be all in lowercase",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("-i", "--inputs", nargs="+", required=True, help="Input VCF files")
parser.add_argument("-s", "--sample", required=True, choices=["mouse", "human"], help="Sample type")
parser.add_argument("-o", "--output", required=True, help="Output folder path")
parser.add_argument("-g", "--gold", help="Input gold standard VCF")
parser.add_argument("-a", "--analysis", action="store_true", help="Include statistics analysis")
parser.add_argument("-t", "--threshold", help="Threshold for comparison")
args = parser.parse_args()
vista(args)