OPERA running#
Goal#
After simulating the phenotypes, we will summarize them into summary statistics format, making them compatible with OPERA, and compare with ColocBoost targeted version.
Input#
Simulated individual level data.
Choose non-overlapping regions from 1,287 regions used in other simulations#
library(tidyverse)
select_gene = read_tsv("/home/hs3393/cloud_colocalization/simulation_data/selected_genes_genotype/selected_gene_tb.tsv")
select_gene = select_gene%>% mutate(index = str_replace(TADB_index, "TADB_", "")) %>%mutate(index = as.numeric(index)) %>% arrange(index)
chrom_str = c()
start_str = c()
end_str = c()
gene_str = c()
cnt = 1
for(i in 1:nrow(select_gene)){
chrom = select_gene[["#chr"]][i]
start = select_gene[["gene_cis_start"]][i]
end = select_gene[["gene_cis_end"]][i]
gene = select_gene[["gene_id"]][i]
if(i == 1){
chrom_str = c(chrom_str, chrom)
start_str = c(start_str, start)
end_str = c(end_str, end)
gene_str = c(gene_str, gene)
cnt = cnt + 1
}else{
if(chrom != chrom_str[cnt - 1]){
chrom_str = c(chrom_str, chrom)
start_str = c(start_str, start)
end_str = c(end_str, end)
gene_str = c(gene_str, gene)
cnt = cnt + 1
}else{
if(start > end_str[cnt -1]){
chrom_str = c(chrom_str, chrom)
start_str = c(start_str, start)
end_str = c(end_str, end)
gene_str = c(gene_str, gene)
cnt = cnt + 1
}
}
}
}
Based on causal variant distribution, mix the result by proportion#
Save the selected files in another folder.
## Obtained from true colocalization configuration
trait_2 = c(2177, 233, 26)
trait_2 = trait_2 / sum(trait_2)
trait_5 = c(1831, 258, 41, 9 )
trait_5 = trait_5 / sum(trait_5)
trait_10 = c(3175, 805, 149, 45, 10)
trait_10 = trait_10 / sum(trait_10)
# mix the
set.seed(100)
total_indices <- 495
proportions <- trait_2
# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)
# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))
# Create a vector of group assignments
groups <- rep(1:3, times = group_sizes)
groups <- sample(groups)
# Create a list to store the indices for each group
partitioned_indices <- lapply(1:3, function(i) which(groups == i) - 1)
for(h2g in c(0.02, 0.03, 0.04, 0.05)){
path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/h2g_", h2g, "_mix/")
dir.create(path, showWarnings = F, recursive = T)
for(i in partitioned_indices[[1]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_1_h2g_",
h2g, "/real_simulation_2opera/sample_",
i, "_real_simulation_1_ncausal_2_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[2]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_2_h2g_",
h2g, "/real_simulation_2opera/sample_",
i, "_real_simulation_2_ncausal_2_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[3]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_3_h2g_",
h2g, "/real_simulation_2opera/sample_",
i, "_real_simulation_3_ncausal_2_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
}
set.seed(100)
total_indices <- 495
proportions <- trait_5
# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)
# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))
# Create a vector of group assignments
groups <- rep(1:4, times = group_sizes)
groups <- sample(groups)
# Create a list to store the indices for each group
partitioned_indices <- lapply(1:4, function(i) which(groups == i) - 1)
for(h2g in c(0.02, 0.03, 0.04, 0.05)){
path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/h2g_", h2g, "_mix/")
dir.create(path, showWarnings = F, recursive = T)
for(i in partitioned_indices[[1]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_1_h2g_",
h2g, "/real_simulation_5opera/sample_",
i, "_real_simulation_1_ncausal_5_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[2]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_2_h2g_",
h2g, "/real_simulation_5opera/sample_",
i, "_real_simulation_2_ncausal_5_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[3]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_3_h2g_",
h2g, "/real_simulation_5opera/sample_",
i, "_real_simulation_3_ncausal_5_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[4]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_4_h2g_",
h2g, "/real_simulation_5opera/sample_",
i, "_real_simulation_4_ncausal_5_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
}
set.seed(100)
total_indices <- 495
proportions <- trait_10
# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)
# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))
# Create a vector of group assignments
groups <- rep(1:5, times = group_sizes)
groups <- sample(groups)
# Create a list to store the indices for each group
partitioned_indices <- lapply(1:5, function(i) which(groups == i) - 1)
for(h2g in c(0.02, 0.03, 0.04, 0.05)){
path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/h2g_", h2g, "_mix/")
dir.create(path, showWarnings = F, recursive = T)
for(i in partitioned_indices[[1]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_1_h2g_",
h2g, "/real_simulation_10opera/sample_",
i, "_real_simulation_1_ncausal_10_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[2]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_2_h2g_",
h2g, "/real_simulation_10opera/sample_",
i, "_real_simulation_2_ncausal_10_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[3]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_3_h2g_",
h2g, "/real_simulation_10opera/sample_",
i, "_real_simulation_3_ncausal_10_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[4]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_4_h2g_",
h2g, "/real_simulation_10opera/sample_",
i, "_real_simulation_4_ncausal_10_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
for(i in partitioned_indices[[5]]){
file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_5_h2g_",
h2g, "/real_simulation_10opera/sample_",
i, "_real_simulation_5_ncausal_10_trait.rds" )
file.copy(from = file_path, to = path, overwrite = TRUE)
}
}
Transform individual level simulation into sumstat#
library("MASS")
library("plink2R")
library("dplyr")
library("readr")
library("tidyverse")
# source some functions to read matrix and inpute the missing data
source("~/cloud_colocalization/simulation_code/simulate_linreg.R")
source("~/cloud_colocalization/simulation_code/misc.R")
calculate_sumstat = function(X, Y){
Beta = c()
se = c()
Freq = c()
p = c()
for(mm in 1:ncol(X)){
rr <- susieR::univariate_regression(X[,mm], Y)
Beta[mm] = rr$betahat
se[mm] <- rr$sebetahat
Freq[mm] = sum(X[,mm])/(2*nrow(X))
p[mm] = 2 * (1 - pnorm(abs(rr$betahat / rr$sebetahat)))
}
tb = tibble(
SNP = colnames(X),
Beta = Beta,
se = se,
Freq = Freq, p = p) %>%
separate(SNP, into = c("Chr", "Bp", "A2", "A1"), sep = "[:_]", remove = FALSE) %>%
select(Chr, SNP, Bp, A1, A2, Freq, Beta, se, p) %>%
mutate(Chr = str_replace(Chr, "chr", "")) %>%
mutate(Chr = as.numeric(Chr), Bp = as.numeric(Bp))
return(tb)
}
cnt = 1
ntrait = TRAIT
h2g = H2G
path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/h2g_", h2g, "_mix")
files = list.files(path, full.names = T, pattern = ".rds")
data <- rep(list(NULL), length(files))
for(file in files){
rds = readRDS(file)
X = rds$X
Y = rds$Y
trait = list()
trait[[1]] = calculate_sumstat(X, Y[,1, drop = T]) %>% rename(freq = Freq, b = Beta) %>% mutate(N = 1160) %>% select(SNP, A1, A2, freq, b, se, p, N)
for(i in 2:ncol(Y)){
trait[[i]] = calculate_sumstat(X, Y[,i, drop = T])
}
index = str_extract(file, "(?<=sample_)\\d+")
data = list()
data[["trait"]] = trait
data$index = index
data$gwas_var_num = length(rds$variant[[1]])
dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
saveRDS(data, paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera/index_", index, ".rds"))
cnt = cnt + 1
}
Example output#
result = readRDS("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/mix_sumstat/h2g_0.02/real_simulation_2opera/index_240.rds")
str(result)
head(result$trait[[1]])
List of 3
$ trait :List of 2
..$ :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 6791 obs. of 8 variables:
.. ..$ SNP : chr [1:6791] "chr12:108093914_C_T" "chr12:108093950_A_G" "chr12:108094333_C_G" "chr12:108094913_A_G" ...
.. ..$ A1 : chr [1:6791] "T" "G" "G" "G" ...
.. ..$ A2 : chr [1:6791] "C" "A" "C" "A" ...
.. ..$ freq: num [1:6791] 0.0924 0.3747 0.3504 0.0928 0.2207 ...
.. ..$ b : num [1:6791] -0.4608 0.1953 -0.0277 -0.4819 -0.443 ...
.. ..$ se : num [1:6791] 0.314 0.181 0.191 0.314 0.212 ...
.. ..$ p : num [1:6791] 0.1425 0.2811 0.885 0.1244 0.0369 ...
.. ..$ N : num [1:6791] 1160 1160 1160 1160 1160 1160 1160 1160 1160 1160 ...
..$ :Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 6791 obs. of 9 variables:
.. ..$ Chr : num [1:6791] 12 12 12 12 12 12 12 12 12 12 ...
.. ..$ SNP : chr [1:6791] "chr12:108093914_C_T" "chr12:108093950_A_G" "chr12:108094333_C_G" "chr12:108094913_A_G" ...
.. ..$ Bp : num [1:6791] 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 ...
.. ..$ A1 : chr [1:6791] "T" "G" "G" "G" ...
.. ..$ A2 : chr [1:6791] "C" "A" "C" "A" ...
.. ..$ Freq: num [1:6791] 0.0924 0.3747 0.3504 0.0928 0.2207 ...
.. ..$ Beta: num [1:6791] -0.1844 -0.1309 0.128 -0.1795 -0.0441 ...
.. ..$ se : num [1:6791] 0.203 0.117 0.123 0.203 0.137 ...
.. ..$ p : num [1:6791] 0.364 0.263 0.299 0.376 0.748 ...
$ index : chr "240"
$ gwas_var_num: int 1
SNP | A1 | A2 | freq | b | se | p | N | |
---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
1 | chr12:108093914_C_T | T | C | 0.09236774 | -0.46078434 | 0.3141903 | 0.14249112 | 1160 |
2 | chr12:108093950_A_G | G | A | 0.37467476 | 0.19533530 | 0.1812171 | 0.28107492 | 1160 |
3 | chr12:108094333_C_G | G | C | 0.35039029 | -0.02765113 | 0.1911775 | 0.88499837 | 1160 |
4 | chr12:108094913_A_G | G | A | 0.09280139 | -0.48190914 | 0.3136359 | 0.12440993 | 1160 |
5 | chr12:108095069_C_T | T | C | 0.22072853 | -0.44301635 | 0.2122775 | 0.03689105 | 1160 |
6 | chr12:108095158_C_T | T | C | 0.25628794 | 0.19442679 | 0.2086272 | 0.35137065 | 1160 |
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=30000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.err
source ~/mamba_activate.sh
module load Singularity
Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/sumstat_generate/trait_TRAIT_h2g_H2G.R
EOF
base_R="base_R"
for trait in 2 5 10; do
for h2g in 0.02 0.03 0.04 0.05; do
output_R="trait_${trait}_h2g_${h2g}.R"
output_sh="trait_${trait}_h2g_${h2g}.sh"
cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_R}
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
Transform sumstat into .esd format and make the .flist summary file#
Also, GWAS trait is saved into .ma format.
library(tidyverse)
cnt = 1
ntrait = TRAIT
h2g = H2G
cwd = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera/")
dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/",
"h2g_", h2g, "/real_simulation_", ntrait, "opera/"), showWarnings = F, recursive = T)
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/",
"h2g_", h2g, "/real_simulation_", ntrait, "opera/")
gwas = list()
path = vector("list", ntrait)
chr = c()
distance = c()
probe_id = c()
cnt = 1
for(file in list.files(cwd, pattern = ".rds")){
file_path = paste0(cwd, file)
rds = readRDS(file_path)
tad_index = as.numeric(sub(".*index_([0-9]+).*", "\\1", file_path))
gwas[[cnt]] = rds$trait[[1]]
distance[cnt] = floor((max(as.numeric(rds$trait[[2]]$Bp)) + min(as.numeric(rds$trait[[2]]$Bp))) / 2)
chr[cnt] = unique(rds$trait[[2]]$Chr)
probe_id[cnt] = paste0("Region_", tad_index)
for(i in 2:ntrait){
path[[i-1]][cnt] = paste0(output_path, "/region_", tad_index, "_trait_", i, ".esd")
rds$trait[[i]] %>% write_tsv(path[[i-1]][cnt])
}
cnt = cnt + 1
}
for(i in 2:ntrait){
tibble(Chr = chr,
ProbeID = paste0(probe_id, "_", i),
GeneticDistance = 0,
ProbeBp = distance,
Gene = paste0(probe_id, "_", i),
Orientation = NA,
PathOfEsd = path[[i -1]]) %>%
write_tsv(paste0(output_path,
"/trait_", i, ".flist"))
}
bind_rows(gwas) %>%
write_tsv(paste0(output_path,
"/trait_1.ma"))
bind_rows(gwas) %>%
select(SNP) %>%
write_tsv(paste0(output_path,
"/remain_snp.tsv"), col_names = FALSE)
Example output#
library(tidyverse)
result = read_delim("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera/region_274_trait_2.esd")
head(result)
Rows: 5504 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): SNP, A1, A2
dbl (6): Chr, Bp, Freq, Beta, se, p
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)
result = read_delim("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera/trait_2.flist")
head(result)
Rows: 495 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): ProbeID, Gene, PathOfEsd
dbl (3): Chr, GeneticDistance, ProbeBp
lgl (1): Orientation
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Chr | ProbeID | GeneticDistance | ProbeBp | Gene | Orientation | PathOfEsd |
---|---|---|---|---|---|---|
<dbl> | <chr> | <dbl> | <dbl> | <chr> | <lgl> | <chr> |
8 | Region_0_2 | 0 | 17497391 | Region_0_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_0_trait_2.esd |
7 | Region_1_2 | 0 | 107320728 | Region_1_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_1_trait_2.esd |
5 | Region_10_2 | 0 | 113896770 | Region_10_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_10_trait_2.esd |
20 | Region_100_2 | 0 | 61926166 | Region_100_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_100_trait_2.esd |
20 | Region_101_2 | 0 | 35475951 | Region_101_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_101_trait_2.esd |
1 | Region_102_2 | 0 | 147899365 | Region_102_2 | NA | /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_102_trait_2.esd |
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=50000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.err
source ~/mamba_activate.sh
module load Singularity
Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/opera_sumstat_extract/trait_TRAIT_h2g_H2G.R
EOF
base_R="base_R"
for trait in 2 5 10; do
for h2g in 0.02 0.03 0.04 0.05; do
output_R="trait_${trait}_h2g_${h2g}.R"
output_sh="trait_${trait}_h2g_${h2g}.sh"
cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_R}
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
Use SMR to convert .esd files to .bsed files as OPERA input#
job_name="besd"
WD="/home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/make_besd"
mkdir -p ${WD}/code
mkdir -p ${WD}/log
traits=("2" "5" "10")
h2gs=("0.02" "0.03" "0.04" "0.05")
cd ${WD}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J JOBNAME
#SBATCH -o PWD/log/JOBNAME."%j".out
#SBATCH -e PWD/log/JOBNAME."%j".err
source ~/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera
for trait_num in {2..10}; do
~/smr_heidi/SMR/smr-1.3.1-linux-x86_64/smr-1.3.1 \
--eqtl-flist ./trait_${trait_num}.flist \
--make-besd \
--cis-wind 3000 \ # extended since our cis region can be larger than 2Mb
--make-besd-dense \ # keep all SNP info
--out ./trait_${trait_num}
done
EOF
base_script="base_script"
for trait in "${traits[@]}"; do
for h2g in "${h2gs[@]}"; do
output_script="script_trait_${trait}_h2g_${h2g}.sh"
cat ${base_script} | sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" | sed "s|PWD|${WD}|g" | sed "s|JOBNAME|${job_name}|g" > ${output_script}
sb ${output_script}
# cat script_variant_${variant}.sh
done
done
Exposure (QTL) meta file summary#
# Create a two-line table with strings
library(tidyverse)
for(TRAIT in c(2,5,10)){
for(H2G in c(0.02, 0.03, 0.04, 0.05)){
path = c()
ntrait = TRAIT
h2g = H2G
cnt = 1
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/h2g_", h2g, "/real_simulation_", ntrait, "opera/")
for(i in c(2:ntrait)){
path[cnt] = paste0(output_path, "trait_", i)
cnt = cnt + 1
}
data <- data.frame(Path = path)
# Print the data
print(data)
# Save the data to a CSV file
write_csv(data, paste0(output_path, "/qtl_list"), col_names = FALSE)
}
}
Example output#
cat /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/qtl_list
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_2
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_3
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_4
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_5
Subset the genome-wide SNP PLINK binary file#
Only include the variants in our selected non-overlapping regions for stage 1 prior estimation.
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J LDextract
#SBATCH -o LDextract.%j.out
#SBATCH -e LDextract.%j.err
source ~/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera
plink2 --bfile /mnt/vast/hpc/csg/molecular_phenotype_calling/genotype_arch/ROSMAP_NIA_WGS.leftnorm.filtered --extract remain_snp.tsv --make-bed --out remain_snp_LD
base_sh="base_sh"
for trait in 2 5 10; do
for h2g in 0.02 0.03 0.04 0.05; do
output_sh="trait_${trait}_h2g_${h2g}.sh"
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
Save genotype (LD) metadata in remain_snp_LD file#
# LD
# Create a two-line table with strings
library(tidyverse)
for(ntrait in c(2,5,10)){
for(h2g in c(0.02, 0.03, 0.04, 0.05)){
cnt = 1
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/h2g_", h2g, "/real_simulation_", ntrait, "opera/")
data <- paste0(output_path, "/remain_snp_LD")
# Print the data
print(data)
# Save the data to a CSV file
write_lines(data, paste0(output_path, "/genotype_data"))
}
}
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.05/real_simulation_10opera//remain_snp_LD"
Stage 1 OPERA (pi estimation)#
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J opera1
#SBATCH -o opera1.%j.out
#SBATCH -e opera1.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--mbfile ./genotype_data --pi-wind 150 \
--estimate-pi --out pi_estimation
base_sh="base_sh"
for trait in 2 5 10; do
for h2g in 0.02 0.03 0.04 0.05; do
output_sh="trait_${trait}_h2g_${h2g}.sh"
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
Example output#
cat /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera/pi_estimation.pi
Posteriors Pi1(0:0:0:0) Pi2(0:0:0:1) Pi3(0:0:1:0) Pi4(0:0:1:1) Pi5(0:1:0:0) Pi6(0:1:0:1) Pi7(0:1:1:0) Pi8(0:1:1:1) Pi9(1:0:0:0) Pi10(1:0:0:1) Pi11(1:0:1:0) Pi12(1:0:1:1) Pi13(1:1:0:0) Pi14(1:1:0:1) Pi15(1:1:1:0) Pi16(1:1:1:1)
Mean 0.275201 0.00228085 0.00233336 0.00224573 0.00231277 0.00220141 0.00223646 0.00227284 0.00246007 0.00209981 0.00291004 0.00263845 0.00362954 0.00242097 0.00327664 0.68948
SD 0.0643589 0.00712061 0.00685669 0.00697502 0.00686388 0.00684095 0.00673787 0.00699348 0.00695986 0.00649404 0.00799288 0.00752632 0.00940491 0.00681977 0.00880387 0.0667714
Stage 2 OPERA#
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=80000
#SBATCH -J opera2
#SBATCH -o opera2.%j.out
#SBATCH -e opera2.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--bfile ./remain_snp_LD \
--prior-pi-file ./pi_estimation.pi \
--prior-var-file ./pi_estimation.var \
--out ./opera_res --outcome-wind 1500
base_sh="base_sh"
for trait in 2 5 10; do
for h2g in 0.02 0.03 0.04 0.05; do
output_sh="trait_${trait}_h2g_${h2g}.sh"
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
Example output#
head /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera/opera_res_4_expos_assoc.ppa
Chr Expo1_ID Expo1_bp Expo2_ID Expo2_bp Expo3_ID Expo3_bp Expo4_ID Expo4_bp PPA(1,2,3,4) p_HEIDI(1,2,3,4)
10 Region_241_2 37025329 Region_241_3 37025329 Region_241_4 37025329 Region_241_5 37025329 0.999967 5.250539e-01
11 Region_107_2 11927391 Region_107_3 11927391 Region_107_4 11927391 Region_107_5 11927391 0.999991 8.772566e-01
11 Region_108_2 18057498 Region_108_3 18057498 Region_108_4 18057498 Region_108_5 18057498 1 6.443910e-01
13 Region_43_2 54814185 Region_43_3 54814185 Region_43_4 54814185 Region_43_5 54814185 0.999916 5.845443e-01
13 Region_449_2 30572631 Region_449_3 30572631 Region_449_4 30572631 Region_449_5 30572631 0.999933 3.243127e-01
18 Region_41_2 61335205 Region_41_3 61335205 Region_41_4 61335205 Region_41_5 61335205 0.999989 8.673183e-01
1 Region_124_2 84179558 Region_124_3 84179558 Region_124_4 84179558 Region_124_5 84179558 0.999975 4.870884e-01
2 Region_73_2 59140327 Region_73_3 59140327 Region_73_4 59140327 Region_73_5 59140327 0.999987 9.353629e-01
3 Region_246_2 108165863 Region_246_3 108165863 Region_246_4 108165863 Region_246_5 108165863 0.999999 3.673531e-01
Previous version: not mix causal variant by configuration#
For each causal variant number get a separate result.
Convert individual level simulation results to sumstat#
library("MASS")
library("plink2R")
library("dplyr")
library("readr")
library("tidyverse")
# source some functions to read matrix and inpute the missing data
source("~/cloud_colocalization/simulation_code/simulate_linreg.R")
source("~/cloud_colocalization/simulation_code/misc.R")
calculate_sumstat = function(X, Y){
Beta = c()
se = c()
Freq = c()
p = c()
for(mm in 1:ncol(X)){
rr <- susieR::univariate_regression(X[,mm], Y)
Beta[mm] = rr$betahat
se[mm] <- rr$sebetahat
Freq[mm] = sum(X[,mm])/(2*nrow(X))
p[mm] = 2 * (1 - pnorm(abs(rr$betahat / rr$sebetahat)))
}
tb = tibble(
SNP = colnames(X),
Beta = Beta,
se = se,
Freq = Freq, p = p) %>%
separate(SNP, into = c("Chr", "Bp", "A2", "A1"), sep = "[:_]", remove = FALSE) %>%
select(Chr, SNP, Bp, A1, A2, Freq, Beta, se, p) %>%
mutate(Chr = str_replace(Chr, "chr", "")) %>%
mutate(Chr = as.numeric(Chr), Bp = as.numeric(Bp))
return(tb)
}
cnt = 1
ntrait = TRAIT
ncausal = CAUSAL
h2g = H2G
path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera")
files = list.files(path, full.names = T, pattern = ".rds")
data <- rep(list(NULL), length(files))
for(file in files){
rds = readRDS(file)
X = rds$X
Y = rds$Y
trait = list()
trait[[1]] = calculate_sumstat(X, Y[,1, drop = T]) %>% rename(freq = Freq, b = Beta) %>% mutate(N = 1160) %>% select(SNP, A1, A2, freq, b, se, p, N)
for(i in 2:ncol(Y)){
trait[[i]] = calculate_sumstat(X, Y[,i, drop = T])
}
index = str_extract(file, "(?<=sample_)\\d+")
data = list()
data[["trait"]] = trait
data$index = index
data$gwas_var_num = length(rds$variant[[1]])
dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
saveRDS(data, paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/index_", index, "_ncausal_", ncausal,".rds"))
cnt = cnt + 1
}
Submission script#
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=30000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.err
source ~/mamba_activate.sh
module load Singularity
Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/sumstat_generate/trait_TRAIT_causal_CAUSAL_h2g_H2G.R
EOF
base_R="base_R"
for trait in 2 5 10; do
for causal in 1 2 3 4 5; do
for h2g in 0.02 0.03 0.04 0.05; do
output_R="trait_${trait}_causal_${causal}_h2g_${h2g}.R"
output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_R}
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
done
library(tidyverse)
cnt = 1
ntrait = TRAIT
ncausal = CAUSAL
h2g = H2G
cwd = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_",
ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_",
ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_",
ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
gwas = list()
path = vector("list", ntrait)
chr = c()
distance = c()
probe_id = c()
cnt = 1
for(file in list.files(cwd, pattern = paste0(ncausal, ".rds"))){
file_path = paste0(cwd, file)
rds = readRDS(file_path)
tad_index = as.numeric(sub(".*index_([0-9]+).*", "\\1", file_path))
gwas[[cnt]] = rds$trait[[1]]
distance[cnt] = floor((max(as.numeric(rds$trait[[2]]$Bp)) + min(as.numeric(rds$trait[[2]]$Bp))) / 2)
chr[cnt] = unique(rds$trait[[2]]$Chr)
probe_id[cnt] = paste0("Region_", tad_index)
for(i in 2:ntrait){
path[[i-1]][cnt] = paste0(output_path, "/region_", tad_index, "_trait_", i, ".esd")
rds$trait[[i]] %>% write_tsv(path[[i-1]][cnt])
}
cnt = cnt + 1
}
for(i in 2:ntrait){
tibble(Chr = chr,
ProbeID = paste0(probe_id, "_", i),
GeneticDistance = 0,
ProbeBp = distance,
Gene = paste0(probe_id, "_", i),
Orientation = NA,
PathOfEsd = path[[i -1]]) %>%
write_tsv(paste0(output_path,
"/trait_", i, ".flist"))
}
bind_rows(gwas) %>%
write_tsv(paste0(output_path,
"/trait_1.ma"))
bind_rows(gwas) %>%
select(SNP) %>%
write_tsv(paste0(output_path,
"/remain_snp.tsv"), col_names = FALSE)
Submission script#
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=50000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.err
source ~/mamba_activate.sh
module load Singularity
Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/opera_sumstat_extract/trait_TRAIT_causal_CAUSAL_h2g_H2G.R
EOF
base_R="base_R"
for trait in 2 5 10; do
for causal in 1 2 3 4 5; do
for h2g in 0.02 0.03 0.04 0.05; do
output_R="trait_${trait}_causal_${causal}_h2g_${h2g}.R"
output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_R}
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
done
Use SMR to convert .esd files to .bsed files as OPERA input#
job_name="besd"
WD="/home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/make_besd"
mkdir -p ${WD}/code
mkdir -p ${WD}/log
traits=("2" "5" "10")
causals=("1" "2" "3" "4" "5")
h2gs=("0.02" "0.03" "0.04" "0.05")
cd ${WD}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J JOBNAME
#SBATCH -o PWD/log/JOBNAME."%j".out
#SBATCH -e PWD/log/JOBNAME."%j".err
source ~/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera
for trait_num in {2..10}; do
~/smr_heidi/SMR/smr-1.3.1-linux-x86_64/smr-1.3.1 \
--eqtl-flist ./trait_${trait_num}.flist \
--make-besd \
--cis-wind 3000 \ # enlarged since our cis region is larger than 2M bases
--make-besd-dense \ # keep all SNP info
--out ./trait_${trait_num}
done
EOF
base_script="base_script"
for trait in "${traits[@]}"; do
for causal in "${causals[@]}"; do
for h2g in "${h2gs[@]}"; do
output_script="script_n_causal_${causal}_trait_${trait}_h2g_${h2g}.sh"
cat ${base_script} | sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" | sed "s|PWD|${WD}|g" | sed "s|JOBNAME|${job_name}|g" > ${output_script}
sb ${output_script}
# cat script_variant_${variant}.sh
done
done
done
Exposure (QTL) meta file summary#
# Create a two-line table with strings
library(tidyverse)
for(TRAIT in c(2,5,10)){
for(CAUSAL in c(1,2,3,4,5)){
for(H2G in c(0.02, 0.03, 0.04, 0.05)){
path = c()
ntrait = TRAIT
ncausal = CAUSAL
h2g = H2G
cnt = 1
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_",
ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
for(i in c(2:ntrait)){
path[cnt] = paste0(output_path, "trait_", i)
cnt = cnt + 1
}
data <- data.frame(Path = path)
# Print the data
print(data)
# Save the data to a CSV file
write_csv(data, paste0(output_path, "/qtl_list"), col_names = FALSE)
}
}
}
Subset the genome-wide SNP PLINK binary file#
Only include the variants in our selected non-overlapping regions for stage 1 prior estimation.
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J LDextract
#SBATCH -o LDextract.%j.out
#SBATCH -e LDextract.%j.err
source ~/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera
plink2 --bfile /mnt/vast/hpc/csg/molecular_phenotype_calling/genotype_arch/ROSMAP_NIA_WGS.leftnorm.filtered --extract remain_snp.tsv --make-bed --out remain_snp_LD
base_sh="base_sh"
for trait in 2 5 10; do
for causal in 1 2 3 4 5; do
for h2g in 0.02 0.03 0.04 0.05; do
output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
#sbatch ${output_sh}
done
done
done
Save genotype (LD) metadata in remain_snp_LD file#
# LD
# Create a two-line table with strings
library(tidyverse)
for(ntrait in c(2,5,10)){
for(ncausal in c(1,2,3,4,5)){
for(h2g in c(0.02, 0.03, 0.04, 0.05)){
cnt = 1
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_",
ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
data <- paste0(output_path, "/remain_snp_LD")
# Print the data
print(data)
# Save the data to a CSV file
write_lines(data, paste0(output_path, "/genotype_data"))
}
}
}
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.05/real_simulation_10opera//remain_snp_LD"
Stage 1 OPERA (pi estimation)#
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J opera1
#SBATCH -o opera1.%j.out
#SBATCH -e opera1.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--mbfile ./genotype_data --pi-wind 150 \
--estimate-pi --out pi_estimation
Stage 2 OPERA#
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=80000
#SBATCH -J opera2
#SBATCH -o opera2.%j.out
#SBATCH -e opera2.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--bfile ./remain_snp_LD \
--prior-pi-file ./pi_estimation.pi \
--prior-var-file ./pi_estimation.var \
--out ./opera_res --outcome-wind 1500
base_sh="base_sh"
for trait in 2 5 10; do
for causal in 1 2 3 4 5; do
for h2g in 0.02 0.03 0.04 0.05; do
output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" > ${output_sh}
sbatch ${output_sh}
done
done
done