-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsplit_espresso_s_output_for_c.py
622 lines (511 loc) · 24.7 KB
/
split_espresso_s_output_for_c.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
import argparse
import os
import os.path
import shutil
OTHER_CHR_NAME = 'other'
def parse_args():
parser = argparse.ArgumentParser(
description=('Split the ESPRESSO_S output to efficiently distribute'
' reads among ESPRESSO_C jobs'))
parser.add_argument('--orig-work-dir',
required=True,
help='the output directory of ESPRESSO_S')
parser.add_argument(
'--new-base-dir',
required=True,
help='a directory to write new output for the new split of ESPRESSO_C')
parser.add_argument(
'--target-reads-per-c',
type=int,
required=True,
help='how many reads should be allocated to a single ESPRESSO_C job')
parser.add_argument('--num-threads-per-c',
type=int,
default=1,
help='how many threads will each ESPRESSO_C job have')
parser.add_argument('--genome-fasta',
required=True,
help='the .fa file to use as input to ESPRESSO')
return parser.parse_args()
def chr_name_from_chr_groups(chr_name, chr_groups):
if chr_name in chr_groups:
return chr_name
return OTHER_CHR_NAME
def sam_list_path_from_c_dir_path(path):
return os.path.join(path, 'sam.list3')
def sj_group_all_from_work_dir(path):
return os.path.join(path, 'SJ_group_all.fa')
def sj_simplified_from_work_dir_and_chr(path, chr_name, chr_groups):
adjusted_chr_name = chr_name_from_chr_groups(chr_name, chr_groups)
sj_simplified_name = '{}_SJ_simplified.list'.format(adjusted_chr_name)
return os.path.join(path, sj_simplified_name)
# ESPRESSO_Q just needs the samples.tsv to have at least
# 1 entry for each c dir and 1 entry for each sample.
def add_new_samples_entries(new_c_dir_i, samples, new_samples_handle):
for sample in samples:
fake_sam_path = '{}_{}.sam'.format(new_c_dir_i, sample)
new_samples_handle.write('{}\t{}\t{}\n'.format(fake_sam_path, sample,
new_c_dir_i))
def split_files_to_new_c_dirs(sample_by_c_dir, new_base_dir, new_samples_path,
target_reads_per_c, num_threads_per_c,
chr_groups, fasta_dir, fasta_index, genome_fasta,
sorted_copy_dir, indices_by_path):
new_c_dir_details = {'all_dirs': list(), 'partial_dirs': list()}
target_per_thread = target_reads_per_c // num_threads_per_c
orig_sj_group_path = sj_group_all_from_work_dir(sorted_copy_dir)
sj_group_index = indices_by_path[orig_sj_group_path]
read_count_details = get_read_counts_by_chr_by_group(
sample_by_c_dir, sorted_copy_dir, indices_by_path)
read_counts_by_chr_by_group = (
read_count_details['read_counts_by_chr_by_group'])
c_dirs_by_group = read_count_details['c_dirs_by_group']
sorted_chrs, sorted_groups_by_chr = sort_chrs_and_groups(
read_counts_by_chr_by_group)
for chr_name in sorted_chrs:
for group in sorted_groups_by_chr[chr_name]:
c_dirs = c_dirs_by_group[group]
split_files_to_new_c_dirs_for_group(
chr_name, group, c_dirs, new_base_dir, target_reads_per_c,
target_per_thread, chr_groups, fasta_dir, fasta_index,
genome_fasta, sorted_copy_dir, indices_by_path,
new_c_dir_details, orig_sj_group_path, sj_group_index,
sample_by_c_dir)
cleanup_partials_after_group(new_c_dir_details, target_reads_per_c)
with open(new_samples_path, 'wt') as new_samples_handle:
for details in new_c_dir_details['all_dirs']:
add_new_samples_entries(details['dir_i'], details['samples'],
new_samples_handle)
new_c_sub_dir_paths = list()
for details in new_c_dir_details['all_dirs']:
new_c_sub_dir_paths.append(details['sub_dir_path'])
return new_c_sub_dir_paths
def cleanup_partials_after_group(new_c_dir_details, target_reads_per_c):
new_partial_dirs = list()
for partial in new_c_dir_details['partial_dirs']:
if partial['read_count'] < target_reads_per_c:
new_partial_dirs.append(partial)
new_c_dir_details['partial_dirs'] = new_partial_dirs
def get_available_new_c_dir(new_c_dir_details, num_partials_out_for_group,
new_base_dir):
partial_dirs = new_c_dir_details['partial_dirs']
if not partial_dirs or (len(partial_dirs) <= num_partials_out_for_group):
new_i = len(new_c_dir_details['all_dirs'])
new_dir_path = os.path.join(new_base_dir, str(new_i))
new_sub_dir_path = os.path.join(new_dir_path, '0')
new_dir = {
'dir_i': new_i,
'chrs': list(),
'read_count': 0,
'read_count_by_group': dict(),
'dir_path': new_dir_path,
'sub_dir_path': new_sub_dir_path,
'samples': set(),
}
new_c_dir_details['all_dirs'].append(new_dir)
partial_dirs.append(new_dir)
os.makedirs(new_sub_dir_path)
return new_dir
return partial_dirs[num_partials_out_for_group]
def split_files_to_new_c_dirs_for_group(
chr_name, group, c_dirs, new_base_dir, target_reads_per_c,
target_per_thread, chr_groups, fasta_dir, fasta_index, genome_fasta,
sorted_copy_dir, indices_by_path, new_c_dir_details,
orig_sj_group_path, sj_group_index, sample_by_c_dir):
num_partials_out_for_group = 0
current_new_c_dir_details = None
for c_dir in c_dirs:
sample = sample_by_c_dir[c_dir]
c_dir_path = os.path.join(sorted_copy_dir, c_dir)
orig_sam_list_path = sam_list_path_from_c_dir_path(c_dir_path)
orig_sj_simplified_path = sj_simplified_from_work_dir_and_chr(
sorted_copy_dir, chr_name, chr_groups)
sam_index = indices_by_path[orig_sam_list_path]
sj_simplified_index = indices_by_path[orig_sj_simplified_path]
remaining_reads_in_c = sam_index[group]['line_count']
with open(orig_sam_list_path, 'rt') as in_sam_handle:
in_sam_handle.seek(sam_index[group]['start'])
while remaining_reads_in_c:
if current_new_c_dir_details is None:
current_new_c_dir_details = get_available_new_c_dir(
new_c_dir_details, num_partials_out_for_group,
new_base_dir)
append_sj_group_lines_for_group(group, orig_sj_group_path,
sj_group_index,
current_new_c_dir_details)
append_sj_simplified_lines_for_group(
group, chr_name, chr_groups, orig_sj_simplified_path,
sj_simplified_index, current_new_c_dir_details)
if chr_name not in current_new_c_dir_details['chrs']:
current_new_c_dir_details['chrs'].append(chr_name)
append_fasta_lines_for_chr(chr_name, genome_fasta,
fasta_index,
current_new_c_dir_details,
fasta_dir)
remaining_to_target = (target_reads_per_c -
current_new_c_dir_details['read_count'])
new_count_by_group = (
current_new_c_dir_details['read_count_by_group'])
count_by_group_value = new_count_by_group.get(group, 0)
remaining_per_thread = target_per_thread - count_by_group_value
remaining_for_new = min(remaining_to_target,
remaining_per_thread)
count_to_write = min(remaining_reads_in_c, remaining_for_new)
append_sam_lines_for_group(count_to_write, in_sam_handle,
current_new_c_dir_details, sample)
remaining_reads_in_c -= count_to_write
current_new_c_dir_details['read_count'] += count_to_write
new_count_by_group[group] = (count_by_group_value +
count_to_write)
if remaining_for_new <= remaining_reads_in_c:
current_new_c_dir_details = None
num_partials_out_for_group += 1
def append_sj_group_lines_for_group(group, orig_sj_group_path, sj_group_index,
current_new_c_dir_details):
new_dir = current_new_c_dir_details['dir_path']
new_sj_group_path = sj_group_all_from_work_dir(new_dir)
# A read group may not have any lines in SJ_group_all.
# Still open the file to create an empty file.
group_index = sj_group_index.get(group)
with open(new_sj_group_path, 'at') as out_handle:
if group_index is None:
return
with open(orig_sj_group_path, 'rt') as in_handle:
in_handle.seek(group_index['start'])
for line_i in range(group_index['line_count']):
line = in_handle.readline()
out_handle.write(line)
def append_sj_simplified_lines_for_group(group, chr_name, chr_groups,
orig_sj_simplified_path,
sj_simplified_index,
current_new_c_dir_details):
new_dir = current_new_c_dir_details['dir_path']
new_sj_simplified_path = sj_simplified_from_work_dir_and_chr(
new_dir, chr_name, chr_groups)
# A read group may not have any lines in its SJ_simplified.list.
# Still open the file to create an empty file.
group_index = sj_simplified_index.get(group)
with open(new_sj_simplified_path, 'at') as out_handle:
if group_index is None:
return
with open(orig_sj_simplified_path, 'rt') as in_handle:
in_handle.seek(group_index['start'])
for line_i in range(group_index['line_count']):
line = in_handle.readline()
out_handle.write(line)
def append_fasta_lines_for_chr(chr_name, genome_fasta, fasta_index,
current_new_c_dir_details, fasta_dir):
dir_i = current_new_c_dir_details['dir_i']
new_fasta_path = os.path.join(fasta_dir, '{}.fa'.format(dir_i))
chr_index = fasta_index[chr_name]
with open(new_fasta_path, 'at') as out_handle:
with open(genome_fasta, 'rt') as in_handle:
in_handle.seek(chr_index['start'])
for line_i in range(chr_index['line_count']):
line = in_handle.readline()
out_handle.write(line)
def append_sam_lines_for_group(count_to_write, in_sam_handle,
current_new_c_dir_details, sample):
current_new_c_dir_details['samples'].add(sample)
sub_dir_path = current_new_c_dir_details['sub_dir_path']
new_sam_path = sam_list_path_from_c_dir_path(sub_dir_path)
with open(new_sam_path, 'at') as out_sam_handle:
for line_i in range(count_to_write):
line = in_sam_handle.readline()
out_sam_handle.write(line)
def sort_chrs_and_groups(read_counts_by_chr_by_group):
unsorted_chrs_and_counts = list()
sorted_groups_by_chr = dict()
for chr_name, counts_by_group in read_counts_by_chr_by_group.items():
unsorted_groups_and_counts = list()
for group, count in counts_by_group.items():
unsorted_groups_and_counts.append((group, count))
sorted_groups_and_counts = sorted(unsorted_groups_and_counts,
key=lambda pair: pair[1],
reverse=True)
largest_count = sorted_groups_and_counts[0][1]
unsorted_chrs_and_counts.append((chr_name, largest_count))
sorted_groups_by_chr[chr_name] = [
pair[0] for pair in sorted_groups_and_counts
]
sorted_chrs_and_counts = sorted(unsorted_chrs_and_counts,
key=lambda pair: pair[1],
reverse=True)
sorted_chrs = [pair[0] for pair in sorted_chrs_and_counts]
return sorted_chrs, sorted_groups_by_chr
def get_read_counts_by_chr_by_group(sample_by_c_dir, sorted_copy_dir,
indices_by_path):
read_counts_by_chr_by_group = dict()
c_dirs_by_group = dict()
for c_dir_name in sample_by_c_dir.keys():
c_dir_path = os.path.join(sorted_copy_dir, c_dir_name)
sam_list_path = sam_list_path_from_c_dir_path(c_dir_path)
index = indices_by_path[sam_list_path]
for group, values in index.items():
c_dirs = c_dirs_by_group.get(group)
if not c_dirs:
c_dirs = list()
c_dirs_by_group[group] = c_dirs
c_dirs.append(c_dir_name)
chr_name = values['chr']
line_count = values['line_count']
read_counts_by_group = read_counts_by_chr_by_group.get(chr_name)
if not read_counts_by_group:
read_counts_by_group = dict()
read_counts_by_chr_by_group[chr_name] = read_counts_by_group
old_count = read_counts_by_group.get(group, 0)
read_counts_by_group[group] = old_count + line_count
for c_dirs in c_dirs_by_group.values():
c_dirs.sort(key=int)
return {
'read_counts_by_chr_by_group': read_counts_by_chr_by_group,
'c_dirs_by_group': c_dirs_by_group
}
def copy_sj_list_lines(new_c_sub_dirs, orig_work_dir, sample_by_c_dir):
# Write the lines from the original sj.list files to the new_c_sub_dirs.
# The distribution among dirs doesn't matter because ESPRESSO_Q will combine them.
# Each new dir needs at least 1 line to avoid a warning.
# Put 1 line in each dir (starting from last) and the rest in the first dir.
new_sj_list_handle = None
new_c_sub_dir_i = len(new_c_sub_dirs) - 1
new_c_sub_dir = new_c_sub_dirs[new_c_sub_dir_i]
try:
new_sj_list_handle = open(os.path.join(new_c_sub_dir, 'sj.list'), 'wt')
for c_dir in sample_by_c_dir.keys():
sj_list_path = os.path.join(orig_work_dir, c_dir, 'sj.list')
with open(sj_list_path, 'rt') as in_handle:
for line in in_handle:
new_sj_list_handle.write(line)
if new_c_sub_dir_i != 0:
new_c_sub_dir_i -= 1
new_c_sub_dir = new_c_sub_dirs[new_c_sub_dir_i]
new_sj_list_handle.close()
new_sj_list_handle = open(
os.path.join(new_c_sub_dir, 'sj.list'), 'wt')
finally:
if new_sj_list_handle:
new_sj_list_handle.close()
def parse_orig_samples(samples_path):
sample_by_c_dir = dict()
with open(samples_path, 'rt') as handle:
for line in handle:
columns = line.strip().split('\t')
sam, sample, c_dir_num = columns
old_sample_name = sample_by_c_dir.get(c_dir_num)
if old_sample_name is not None and old_sample_name != sample:
raise Exception(
'c dir has multiple sample names: {} {} {} {}'.format(
c_dir_num, sam, sample, old_sample_name))
sample_by_c_dir[c_dir_num] = sample
return sample_by_c_dir
def sort_by_chr_read_orig(orig_path, line_to_chr_and_group, length_by_group,
temp_sorting_dir, temp_files_by_group_number):
prev_chr = None
temp_chr_handle = None
group_start_offset = None
prev_group = None
group_line_count = None
try:
with open(orig_path, 'rt') as orig_handle:
while True:
offset = orig_handle.tell()
line = orig_handle.readline()
if not line:
if prev_group is not None:
group_length = offset - group_start_offset
length_by_group[prev_group] = [
group_length, group_line_count
]
break
chr_name, group = line_to_chr_and_group(line)
if chr_name is None:
# line goes with previous
pass
elif group != prev_group:
if prev_group is not None:
group_length = offset - group_start_offset
length_by_group[prev_group] = [
group_length, group_line_count
]
group_start_offset = offset
group_line_count = 0
prev_group = group
if chr_name != prev_chr:
prev_chr = chr_name
if temp_chr_handle:
temp_chr_handle.close()
temp_chr_path = os.path.join(temp_sorting_dir,
chr_name)
temp_files_by_group_number[group] = temp_chr_path
temp_chr_handle = open(temp_chr_path, 'wt')
temp_chr_handle.write(line)
group_line_count += 1
finally:
if temp_chr_handle:
temp_chr_handle.close()
def sort_by_chr(orig_path, new_path, line_to_chr_and_group, temp_sorting_dir):
index = dict()
length_by_group = dict()
temp_files_by_group_number = dict()
sort_by_chr_read_orig(orig_path, line_to_chr_and_group, length_by_group,
temp_sorting_dir, temp_files_by_group_number)
sorted_group_numbers = sorted(list(temp_files_by_group_number.keys()))
with open(new_path, 'wt') as out_handle:
for group_number in sorted_group_numbers:
temp_file_path = temp_files_by_group_number[group_number]
with open(temp_file_path, 'rt') as in_handle:
while True:
line = in_handle.readline()
if not line:
break
chr_name, group = line_to_chr_and_group(line)
length, line_count = length_by_group[group]
start_offset = out_handle.tell()
group_end_offset = start_offset + length
index[group] = {
'start': start_offset,
'end': group_end_offset,
'line_count': line_count,
'chr': chr_name,
}
out_handle.write(line)
for line_i in range(line_count - 1):
line = in_handle.readline()
out_handle.write(line)
os.remove(temp_file_path)
return index
def sj_group_all_line_to_chr_and_group(line):
if not line.startswith('>'):
return None, None
colon_split = line.split(':')
group_str = colon_split[-2]
group = int(group_str)
chr_name = colon_split[0][1:]
return chr_name, group
def sam_list_line_to_chr_and_group(line):
columns = line.strip().split('\t')
group = int(columns[0])
chr_name = columns[6]
return chr_name, group
def sj_simplified_list_line_to_chr_and_group(line):
if not line.startswith('SJ_cluster'):
return None, None
columns = line.strip().split('\t')
group = int(columns[1])
chr_name = columns[4]
return chr_name, group
def copy_sort_and_index_some_orig_files(orig_work_dir, new_base_dir):
result = dict()
sorted_copy_dir = os.path.join(new_base_dir, 'sorted_copies')
os.makedirs(sorted_copy_dir)
result['directory'] = sorted_copy_dir
indices_by_path = dict()
result['indices_by_path'] = indices_by_path
temp_sorting_dir = os.path.join(sorted_copy_dir, 'temp_sorting_dir')
os.makedirs(temp_sorting_dir)
orig_sj_group_path = sj_group_all_from_work_dir(orig_work_dir)
sorted_sj_group_path = sj_group_all_from_work_dir(sorted_copy_dir)
sj_group_index = sort_by_chr(orig_sj_group_path, sorted_sj_group_path,
sj_group_all_line_to_chr_and_group,
temp_sorting_dir)
indices_by_path[sorted_sj_group_path] = sj_group_index
orig_c_dirs = list()
orig_sj_simplified_lists = list()
orig_file_names = os.listdir(orig_work_dir)
for file_name in orig_file_names:
path = os.path.join(orig_work_dir, file_name)
if os.path.isdir(path) and file_name.isdigit():
orig_c_dirs.append(file_name)
elif file_name.endswith('SJ_simplified.list'):
orig_sj_simplified_lists.append(file_name)
for orig_c_dir_name in orig_c_dirs:
orig_c_dir_path = os.path.join(orig_work_dir, orig_c_dir_name)
new_c_dir_path = os.path.join(sorted_copy_dir, orig_c_dir_name)
os.makedirs(new_c_dir_path)
orig_sam_list_path = sam_list_path_from_c_dir_path(orig_c_dir_path)
new_sam_list_path = sam_list_path_from_c_dir_path(new_c_dir_path)
sam_list_index = sort_by_chr(orig_sam_list_path, new_sam_list_path,
sam_list_line_to_chr_and_group,
temp_sorting_dir)
indices_by_path[new_sam_list_path] = sam_list_index
for sj_list_name in orig_sj_simplified_lists:
orig_path = os.path.join(orig_work_dir, sj_list_name)
new_path = os.path.join(sorted_copy_dir, sj_list_name)
sj_list_index = sort_by_chr(orig_path, new_path,
sj_simplified_list_line_to_chr_and_group,
temp_sorting_dir)
indices_by_path[new_path] = sj_list_index
return result
def index_fasta(fasta):
index = dict()
chr_name = None
line_count = 0
with open(fasta, 'rt') as handle:
while True:
offset = handle.tell()
line = handle.readline()
if not line:
if chr_name is not None:
index[chr_name]['line_count'] = line_count
break
if line.startswith('>'):
if chr_name is not None:
index[chr_name]['line_count'] = line_count
line_count = 0
chr_name = line[1:].split()[0]
if chr_name in index:
raise Exception('found {} multiple times in {}'.format(
chr_name, fasta))
index[chr_name] = {
'start': offset,
'line_count': 0,
}
line_count += 1
return index
def get_chr_groups_from_s_output(orig_work_dir):
chr_groups = list()
file_names = os.listdir(orig_work_dir)
expected_suffix = '_SJ_simplified.list'
for file_name in file_names:
if not file_name.endswith(expected_suffix):
continue
chr_name = file_name[:-len(expected_suffix)]
chr_groups.append(chr_name)
return chr_groups
def create_output_dir(dir_path):
if os.path.exists(dir_path):
if not os.path.isdir(dir_path):
raise Exception('new output dir already exists and is a file')
if os.listdir(dir_path):
raise Exception('new output dir already exists and is not empty')
return
os.makedirs(dir_path)
def split_espresso_s_output_for_c(orig_work_dir, new_base_dir,
target_reads_per_c, num_threads_per_c,
genome_fasta):
create_output_dir(new_base_dir)
chr_groups = get_chr_groups_from_s_output(orig_work_dir)
fasta_dir = os.path.join(new_base_dir, 'fastas')
os.makedirs(fasta_dir)
fasta_index = index_fasta(genome_fasta)
copied_details = copy_sort_and_index_some_orig_files(
orig_work_dir, new_base_dir)
sorted_copy_dir = copied_details['directory']
indices_by_path = copied_details['indices_by_path']
orig_samples_path = os.path.join(orig_work_dir, 'samples.tsv.updated')
new_samples_path = os.path.join(new_base_dir, 'samples.tsv.updated')
sample_by_c_dir = parse_orig_samples(orig_samples_path)
temp_bam_to_sam_dir = os.path.join(sorted_copy_dir, 'temp_bam_to_sam')
os.makedirs(temp_bam_to_sam_dir)
new_c_sub_dirs = split_files_to_new_c_dirs(
sample_by_c_dir, new_base_dir, new_samples_path, target_reads_per_c,
num_threads_per_c, chr_groups, fasta_dir, fasta_index, genome_fasta,
sorted_copy_dir, indices_by_path)
copy_sj_list_lines(new_c_sub_dirs, orig_work_dir, sample_by_c_dir)
shutil.rmtree(sorted_copy_dir)
def main():
args = parse_args()
split_espresso_s_output_for_c(args.orig_work_dir, args.new_base_dir,
args.target_reads_per_c,
args.num_threads_per_c, args.genome_fasta)
if __name__ == '__main__':
main()