-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3.feature2target.py
31 lines (26 loc) · 1.54 KB
/
3.feature2target.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
##*************************************************************************##
## Step3. pre-processing the data to create a feature-to-target table ##
##*************************************************************************##
import pandas as pd
import sys
#if the input arguments not 4, showing the usage.
if len(sys.argv)!=4:
print("Usage:python3 feature2target.py <vcf_snp_count.txt> <antibiotics table> <outfile> \n\n e.g., python3 feature2target.py vcf_snp_count.txt Antibiotics.txt feature2target.txt ")
sys.exit()
# pd.set_option('display.max_column', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('display.width', 1000)
df = pd.read_csv(sys.argv[1], sep = '\t',dtype=str, header = 0)
# hard code
df1 = df.drop(df.columns[1:6], axis=1).set_index('locus_tag').transpose() #reset the index
# df1 = df.drop(['gene_name','start_site','end_site','gene_length','GCF_013371725'], axis=1).set_index('locus_tag').transpose() #reset the index
df1.reset_index(level=0, inplace = True)
df1 = df1.rename(columns={"index": "locus_tag"})
df2 = pd.read_csv(sys.argv[2], sep = '\t',dtype=str, header = 0)
# if the antibiotics.txt already have the R, I, S classification, then user can uncomment this code.
#df2 = df2.fillna(0).replace('0','NA').replace(['R','I','S'],[1,0,-1])
# print(df2.head())
#df3 = pd.merge(df1, df2, on='locus_tag',how = 'inner')
df3 = pd.merge(df2, df1, left_on='isolate', right_on='locus_tag', how='right').drop('isolate', axis=1)
df3.to_csv(sys.argv[3],index=False,sep='\t')
#outfile = open (sys.argv[3],'w')