Skip to content

Commit

Permalink
Add diploid reference scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
jonassibbesen committed Jun 24, 2022
1 parent 35b2e77 commit e14771e
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 0 deletions.
116 changes: 116 additions & 0 deletions python/choose_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 27 10:19:54 2022
@author: Jordan
"""

import sys
import random

SECONDARY = 256
SUPPLEMENTARY = 2048

def to_str(s):
if type(s) == bytes:
return s.decode("utf-8")
else:
return s

# print the merged headers and
def process_headers(f1, f2):

# lines above the SQ lines
line1 = to_str(next(f1))
while not line1.startswith("@SQ"):
print(line1, end = "")
line1 = to_str(next(f1))
line2 = to_str(next(f2))
while not line2.startswith("@SQ"):
line2 = to_str(next(f2))

# SQ lines
while line1.startswith("@SQ"):
print(line1, end = "")
line1 = to_str(next(f1))
while line2.startswith("@SQ"):
if not "KI" in line2 and not "GL" in line2:
print(line2, end = "")
line2 = to_str(next(f2))

# lines after SQ lines
while line1 is not None and line1.startswith("@"):
print(line1, end = "")
line1 = to_str(next(f1))

while line2 is not None and line2.startswith("@"):
line2 = to_str(next(f2))

return line1.strip().split("\t"), line2.strip().split("\t")


# get next sam record, skipping secondary and supplementary alignments
def get_next(f):
try:
r = next(f)
except StopIteration:
return None
else:
r = to_str(r)
tokens = r.strip().split("\t")
flags = int(tokens[1])
while flags & SECONDARY or flags & SUPPLEMENTARY:
try:
r = next(f)
except StopIteration:
return None
r = to_str(r)
tokens = r.strip().split("\t")
flags = int(tokens[1])

return tokens

def score(tokens):
s = 0
if len(tokens) >= 12:
for tag in tokens[11:]:
if tag.startswith("AS"):
s = int(tag.split(":")[2])
break
return s

if __name__ == "__main__":

sam1 = sys.argv[1]
sam2 = sys.argv[2]

f1 = open(sam1)
f2 = open(sam2)

random.seed(982316498213)

tokens1, tokens2 = process_headers(f1, f2)

while tokens1 is not None and tokens2 is not None:

# qnames should match
assert(tokens1[0] == tokens2[0])

score1 = score(tokens1)
score2 = score(tokens2)

emit = None
if score1 > score2:
emit = tokens1
elif score1 < score2:
emit = tokens2
else:
emit = random.choice([tokens1, tokens2])

print("\t".join(emit))

tokens1 = get_next(f1)
tokens2 = get_next(f2)


28 changes: 28 additions & 0 deletions python/fix_cross_map_flags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 15 12:58:38 2022
@author: Jordan
"""

import sys


if __name__ == "__main__":


primary_mask = ~256

for line in sys.stdin:
if type(line) == bytes:
line = line.decode("utf-8")
if line.startswith("@"):
print(line.strip())
continue

tokens = line.strip().split("\t")

tokens[1] = str(int(tokens[1]) & primary_mask)

print("\t".join(tokens))

0 comments on commit e14771e

Please sign in to comment.