Skip to content

Commit

Permalink
Merge pull request #74 from broadinstitute/mtegtmey-patch-2
Browse files Browse the repository at this point in the history
Delete presentations directory
  • Loading branch information
shntnu authored Jan 24, 2023
2 parents c7f8de1 + 34d54c6 commit 6c3e2cc
Show file tree
Hide file tree
Showing 311 changed files with 4,194 additions and 648 deletions.
101 changes: 101 additions & 0 deletions 01_MakePhenoCov.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
library(tidyr, lib="/apps/source/R/3.6.3/lib64/R/library/")
library(fansi, lib="/apps/source/R/3.6.3/lib64/R/library/")
library(dplyr, lib="/apps/source/R/3.6.3/lib64/R/library/")

###module load R/3.6.3
###inputs are:
###intcol
###isolate

args <- commandArgs(trailingOnly=TRUE)

id_map <- read.csv("../SecondRound_JatinFeatures/wgs.map.ModifiedSamira.csv")

features <- readRDS(paste("/data/srlab/jatin/cmqtl/gwas_data/final/donor_level/",args[1],".final.rds", sep=""))

Covs <- c("FID",
"IID",
"Metadata_Plate",
"Metadata_Sex")

qCovs <- c("FID",
"IID",
"PC1",
"PC2",
"PC3",
"PC4",
"Cells_Neighbors_NumberOfNeighbors_Adjacent",
"Metadata_onEdge")

ExcludeColumns <- c("Metadata_line_ID",
"Metadata_ID",
"Metadata_Plate",
"Metadata_Clinical_Diagnosis",
"Metadata_Age",
"Metadata_Sex",
"Metadata_iPSC_Origin",
"Metadata_Doubling_Hour",
"Metadata_Disease_Status" ,
"Metadata_Disease_Category",
"Metadata_CellCount_avg",
"PC1",
"PC2",
"PC3",
"PC4",
"PC5",
"Cells_Neighbors_NumberOfNeighbors_Adjacent",
"Metadata_onEdge")

### for isolate i used the following lists instead
#qCovs <- c("FID",
# "IID",
# "PC1",
# "PC2",
# "PC3",
# "PC4",
# "Metadata_onEdge")
#
#ExcludeColumns <- c("Metadata_line_ID",
# "Metadata_ID",
# "Metadata_Plate",
# "Metadata_Clinical_Diagnosis",
# "Metadata_Age",
# "Metadata_Sex",
# "Metadata_iPSC_Origin",
# "Metadata_Doubling_Hour",
# "Metadata_Disease_Status" ,
# "Metadata_Disease_Category",
# "Metadata_CellCount_avg",
# "PC1",
# "PC2",
# "PC3",
# "PC4",
# "PC5",
# "Metadata_onEdge")
#
## order data like fam
data <- merge(id_map, features, by=c("Metadata_line_ID","Metadata_ID"))

fam <- read.table("../SecondRound_JatinFeatures/PlinkFiles/V2F_chr1.nodup.fam", header=F)
fam <- fam[,1:2]
colnames(fam) <- c("FID", "IID")

new_data <- merge(fam, data, by.x="FID", by.y="wgs_id", all.x=T)
new_data <- new_data %>% group_by(Metadata_ID) %>% sample_n(1)
new_data <- new_data[match(fam$IID, new_data$IID),]
new_data <- subset(new_data, !is.na(Metadata_ID))

### make pheno and cov files
new_data <- as.data.frame(new_data)
pheno <- new_data %>% dplyr::select(-c(all_of(ExcludeColumns)))
covar <- new_data %>% dplyr::select(c(all_of(Covs)))
qcovar <- new_data %>% dplyr::select(c(all_of(qCovs)))

write.table(colnames(pheno[,-c(1:2)]), paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".phenotypeNames.txt", sep=""), quote=F, row.names=T, col.names=F)
write.table(colnames(covar[,-c(1:2)]), paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".CatCovarNames.txt", sep=""), quote=F, row.names=T, col.names=F)
write.table(colnames(qcovar[,-c(1:2)]), paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".QuanCovarNames.txt", sep=""), quote=F, row.names=T, col.names=F)

write.table(pheno, paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".pheno", sep=""), quote=F, row.names=F, col.names=F)
write.table(qcovar, paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".quantitative.cov", sep=""), quote=F, row.names=F, col.names=F)
write.table(covar, paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/",args[1],"/",args[1],".categorical.cov", sep=""), quote=F, row.names=F, col.names=F)

51 changes: 51 additions & 0 deletions 02_RunAssociation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

module load plink/1.90b3
module load gcta/1.91.1

export LSB_JOB_REPORT_MAIL=N

function write_Rscript()
{
local outDir=/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov
local chr=$1
local pheno=$2
local cell=$3

cat << EOF | bsub
export LSB_JOB_REPORT_MAIL=N
#BSUB -L /bin/bash
#BSUB -o STDfiles/fastgwa_${chr}_${pheno}_${cell}.out
#BSUB -e STDfiles/fastgwa_${chr}_${pheno}_${cell}.err
#BSUB -J fastgwa
#BSUB -M 8000
#BSUB -q rerunnable
#BSUB -R 'rusage[mem=8000]'
#BSUB -R 'select[hname!=cn001]'
#BSUB -R 'select[hname!=cn002]'
#BSUB -R 'select[hname!=cn003]'
#BSUB -R 'select[hname!=cn004]'
#BSUB -R 'select[hname!=cn005]'
/data/srlab/sasgari/Software/gcta_1.93.2beta/gcta64 --bfile ../SecondRound_JatinFeatures/PlinkFiles/V2F_chr${chr}.nodup.qc \
--pheno ${outDir}/${cell}/${cell}.pheno \
--mpheno ${pheno} \
--covar ${outDir}/${cell}/${cell}.categorical.cov \
--qcovar ${outDir}/${cell}/${cell}.quantitative.cov \
--remove ../SecondRound_JatinFeatures/PlinkFiles/ExcludeFromAssociation.ID \
--maf 0.05 \
--fastGWA-lr \
--threads 10 \
--out ${outDir}/${cell}/${cell}_chr${chr}_pheno${pheno}.LR
gzip -f ${outDir}/${cell}/${cell}_chr${chr}_pheno${pheno}.LR.fastGWA
EOF
}


cat Chr_Pheno_Cell.list | while read chr pheno cell
do
write_Rscript $chr $pheno $cell
done
22 changes: 22 additions & 0 deletions 03_GetLambda_QQ.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
library(qqman)
library(data.table)
library(readr)

args <- commandArgs(trailingOnly = TRUE)

raw.files <- Sys.glob(file.path(paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/", args[2],"/",args[2],"_chr*_pheno",args[1],".LR.fastGWA.gz", sep="")))

results <- suppressWarnings(rbindlist(lapply(raw.files, function(filename) {suppressMessages(readr::read_tsv(filename, na = c("NaN")))})))

results=na.omit(results)
pvalues=results$P

### qchisq(0.5,1) is the median of a chi-squared distribution with one degree of freedom
z <- as.numeric(results$BETA)/as.numeric(results$SE)
lambda2 = round(median(z^2) / qchisq(0.5,1), 4)

write.table(t(c(args[1], lambda2)), paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/QQ/", args[2],"/",args[2],"_pheno",args[1],".QQ.txt", sep=""), quote=F, col.names=F, row.names=F)

#png(file=paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/QQ/", args[2],"/",args[2],"_pheno",args[1],".QQ.png", sep=""), width=400, height=400)
#qqman::qq(pvalues, cex = 0.5, cex.axis = 1,main = paste(args[2], "pheno", args[1], "\n lambda=", lambda2, sep=" "))
#dev.off()
34 changes: 34 additions & 0 deletions 03_Run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
module load R/3.4.0

function write_Rscript()
{
local pheno=$1
local cell=$2
module load R/3.4.0

cat << EOF | bsub
export LSB_JOB_REPORT_MAIL=N
#BSUB -L /bin/bash
#BSUB -o STDfiles/QQ_${pheno}_${cell}.out
#BSUB -e STDfiles/QQ_${pheno}_${cell}.err
#BSUB -J QQ
#BSUB -M 12000
#BSUB -q rerunnable
#BSUB -R 'rusage[mem=12000]'
#BSUB -R 'select[hname!=cn001]'
#BSUB -R 'select[hname!=cn002]'
#BSUB -R 'select[hname!=cn003]'
#BSUB -R 'select[hname!=cn004]'
#BSUB -R 'select[hname!=cn005]'
/apps/lib-osver/R/3.4.0/bin/Rscript 03_GetLambda_QQ.R $pheno $cell
EOF
}

cat Pheno_Cell.list | awk '{if ($1 < 4) print $0}' | while read pheno cell
do
write_Rscript $pheno $cell
done
23 changes: 23 additions & 0 deletions 04_GetdataForManhattanPlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
library(data.table)
library(readr)
library(tidyverse)

args <- commandArgs(trailingOnly = TRUE)

raw.files <- Sys.glob(file.path(paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/", args[2],"/",args[2],"_chr*_pheno",args[1],".LR.fastGWA.gz", sep="")))

results1 <- suppressWarnings(rbindlist(lapply(raw.files, function(filename) {suppressMessages(readr::read_tsv(filename, na = c("NaN")))})))
results1=na.omit(results1)
results1$P <- as.numeric(results1$P)

sig <- subset(results1, P < 5e-8)

#results1.1 <- results1 %>% filter(P <= 1e-2)
#results1.2 <- results1 %>% filter(P > 1e-2) %>% sample_frac(0.01)
#
#results <- rbind(results1.1, results1.2)

write.table(sig, paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/", args[2],"/",args[2],"_pheno",args[1],".lowP4manhattan.tmp", sep=""), quote=F, col.names=T, row.names=F, sep="\t")

#write.table(results, paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/", args[2],"/",args[2],"_pheno",args[1],".DownSampled.tmp", sep=""), quote=F, col.names=T, row.names=F, sep="\t")

37 changes: 37 additions & 0 deletions 04_Run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
module load R/3.4.0

function write_Rscript()
{
local pheno=$1
local cell=$2
module load R/3.4.0

cat << EOF | bsub
export LSB_JOB_REPORT_MAIL=N
#BSUB -L /bin/bash
#BSUB -o STDfiles/ForManh_${pheno}_${cell}.out
#BSUB -e STDfiles/ForManh_${pheno}_${cell}.err
#BSUB -J DownSamp
#BSUB -M 8000
#BSUB -q rerunnable
#BSUB -R 'rusage[mem=6000]'
#BSUB -R 'select[hname!=cn001]'
#BSUB -R 'select[hname!=cn002]'
#BSUB -R 'select[hname!=cn003]'
#BSUB -R 'select[hname!=cn004]'
#BSUB -R 'select[hname!=cn005]'
/apps/lib-osver/R/3.4.0/bin/Rscript 04_GetdataForManhattanPlot.R $pheno $cell
#gzip -f /data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/${cell}/${cell}_pheno${pheno}.DownSampled.tmp
EOF
}

cat Pheno_Cell.list | while read pheno cell
do
write_Rscript $pheno $cell
done

28 changes: 28 additions & 0 deletions 05_ManhattanPlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
library(qqman)
library(tidyverse)
library(data.table)
library(readr)

args <- commandArgs(trailingOnly = TRUE)

base.files <- Sys.glob(file.path(paste("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/", args[1],"/",args[1],"*.DownSampled.tmp.gz", sep="")))

results1 <- suppressWarnings(rbindlist(lapply(base.files, function(filename) {suppressMessages(readr::read_tsv(filename, na = c("NaN")))})))
results1=na.omit(results1)

results1.1 <- results1 %>% filter(P <= 1e-3)
results1.2 <- results1 %>% filter(P > 1e-3) %>% sample_frac(0.05)

results <- rbind(results1.1, results1.2)

##fdr <- fread(paste("gzip -dc FDRsigPvalue_", args[1], ".txt.gz", sep=""))
##x <- nrow(fdr)
##if(x == 0){
##sug <- -log10(5e-8/908)
##} else {
##sug <- -log10(max(fdr$V1))
##}

png(file=paste(args[1], "_AllPheno_downSampled.png", sep=""), width=900, height=400)
manhattan(results, chr="CHR", bp="POS", snp="SNP", p="P", suggestiveline = -log10(5e-8), genomewideline = -log10(5e-8/246) ,col=c("coral","blueviolet"), ylim=c(0, -log10(min(results$P))+1))
dev.off()
27 changes: 27 additions & 0 deletions 06_PapersTable.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library(readr)
library(stringr)
library(dplyr)
library(data.table)

pheno <- read.table("/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/isolate/isolate.phenotypeNames.txt", header=F)
colnames(pheno) <- c("Pheno", "Feature")
pheno$Pheno <- paste0("pheno", pheno$Pheno)

raw.files <- Sys.glob(file.path(
"/data/srlab2/sasgari/Projects/V2F/Analysis/GWAS/FourthdeRound_TwoGroupEdgeCov/*/*_pheno*.lowP4manhattan.tmp"
))

data <- suppressWarnings(rbindlist(lapply(raw.files, function(filename) {
suppressMessages(readr::read_tsv(filename, col_names=FALSE))}), idcol = "File"))
data[, File := factor(File, labels = basename(raw.files))]
data <- data %>% filter(X1 != "CHR")
data$File <- str_remove(data$File, ".lowP4manhattan.tmp")

colnames(data) <- c("File", "CHR", "SNP", "POS", "A1", "A2", "N", "AF1", "BETA","SE","P")
data$Cell <- str_split_fixed(data$File, "_", 2)[,1]
data$Pheno <- str_split_fixed(data$File, "_", 2)[,2]

data <- merge(data, pheno, by="Pheno")
data$P <- as.numeric(data$P)
data <- data %>% filter(P <= 4.1e-8)
write.table(data, "SuggestiveGWAS_results.txt", quote=F, col.names=T, row.names=F, sep="\t")
Loading

0 comments on commit 6c3e2cc

Please sign in to comment.