-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathname_the_proteins.py
66 lines (54 loc) · 1.44 KB
/
name_the_proteins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/user/bin/env python3
"""
Author: Gustavo Tamasco
Script to run:
The script takes:
Run the code by: ex_p5.py <reference_fasta_file> <related_fasta_file>
"""
# import statements
from sys import argv
import os.path
import subprocess
# functions and classes
def parse_names(fasta):
all_names = {}
for line in fasta:
if line.startswith(">"):
label = line.strip().split(":")[0]
name = line.strip().split(":")[1]
all_names[label] = name
return all_names
def parse_prot(file):
seq_dic = {}
for line in file:
if line.startswith(">"):
label = line.strip()
seq_dic[label] = ""
else:
seq_dic[label] += line.strip()
return seq_dic
def give_names(names, prot_dic):
named_prot = {}
for k,v in names.items():
if k in prot_dic.keys():
named_prot[v] = prot_dic[k]
return named_prot
def write_out(info):
#opening new file
out = open("pantoea_rast.faa", "w")
for k, v in info.items():
exit_info = (">{0}\n{1}".format(k,v))
out.write(exit_info)
out.write("\n")
out.close()
def main():
"""Main code of the script"""
with open(argv[1]) as fasta_file:
names = parse_names(fasta_file)
with open(argv[2]) as prot_file:
prot_dic = parse_prot(prot_file)
named_prot = give_names(names,prot_dic)
write_out(named_prot)
# main
if __name__ == '__main__':
main()