-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGM_to_NA.rb
executable file
·45 lines (35 loc) · 1.24 KB
/
GM_to_NA.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/ruby
# USAGE: for i in results/*.csv;do (echo $i;./GM_to_NA.rb $i);done
# create GM (GEO samples ids) to NA (coriell ids) lists for all experiments
require 'rubygems'
require 'fastercsv'
infile = ARGV[0]
filename = infile.split("/")[1].split(".")[0]
# gm_to_na_ofile = "results/Hapmap/GM2NA_list_#{filename}.csv"
gm_to_na_ofile = "results/eqtl_results_sets/CV_plots/NAs_for_mixed_genes_in_#{filename}.csv"
# isolate the ids (ex. 06994) from the GM ids (ex. GM06994_rep1) of all experiments (GSE files)
ids = Hash.new { |h,k| h[k] = [] }
FasterCSV.foreach(infile) do |row|
row[6..row.length-1].each do |sample|
if !sample.empty?
if row.any? { |s| s.include?("Technical replicate") }
if !sample.include? "Technical replicate"
id = sample.split("GM")[1].split(",")[0]
else
next
end
else
id = sample.split("GM")[1].split("_rep")[0]
end
ids[id] = nil
end
end
break
end
# export a csv file with a list of all the ids of each experiment
gm_to_na_output = File.open(gm_to_na_ofile, "w")
ids.each_key do |id|
gm_to_na_output.puts id
end
gm_to_na_output.close
# find the ids of the gm_to_na_output in each sample individual file (CEU, JPT, YRI, CHB) from hapmap (last column contains coriell id, ex. NA06994)