OPERA running#

Goal#

After simulating the phenotypes, we will summarize them into summary statistics format, making them compatible with OPERA, and compare with ColocBoost targeted version.

Input#

Simulated individual level data.

Choose non-overlapping regions from 1,287 regions used in other simulations#

library(tidyverse)
select_gene = read_tsv("/home/hs3393/cloud_colocalization/simulation_data/selected_genes_genotype/selected_gene_tb.tsv")

select_gene = select_gene%>% mutate(index = str_replace(TADB_index, "TADB_", ""))  %>%mutate(index = as.numeric(index)) %>% arrange(index) 

chrom_str = c()
start_str = c()
end_str = c()
gene_str = c()
cnt = 1

for(i in 1:nrow(select_gene)){
    chrom = select_gene[["#chr"]][i]
    start =  select_gene[["gene_cis_start"]][i]
    end = select_gene[["gene_cis_end"]][i]
    gene = select_gene[["gene_id"]][i]
    if(i == 1){
     chrom_str = c(chrom_str, chrom)
     start_str = c(start_str, start)
     end_str = c(end_str, end)
      gene_str = c(gene_str, gene)
        cnt = cnt + 1
    }else{
        if(chrom != chrom_str[cnt - 1]){
            chrom_str = c(chrom_str, chrom)
             start_str = c(start_str, start)
            end_str = c(end_str, end)
            gene_str = c(gene_str, gene)
            cnt = cnt + 1
        }else{
            if(start > end_str[cnt -1]){
                chrom_str = c(chrom_str, chrom)
                 start_str = c(start_str, start)
                 end_str = c(end_str, end)
                gene_str = c(gene_str, gene)
                cnt = cnt + 1
            } 
        }
    }
}

Based on causal variant distribution, mix the result by proportion#

Save the selected files in another folder.

## Obtained from true colocalization configuration

trait_2 = c(2177, 233, 26)
trait_2 = trait_2 / sum(trait_2)

trait_5 = c(1831, 258, 41, 9 )
trait_5 = trait_5 / sum(trait_5)

trait_10 = c(3175, 805, 149, 45, 10)
trait_10 = trait_10 / sum(trait_10)
# mix the 

set.seed(100)
total_indices <- 495
proportions <- trait_2

# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)

# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))

# Create a vector of group assignments
groups <- rep(1:3, times = group_sizes)


groups <- sample(groups)

# Create a list to store the indices for each group
partitioned_indices <- lapply(1:3, function(i) which(groups == i) - 1)

for(h2g in c(0.02, 0.03, 0.04, 0.05)){
    path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/h2g_", h2g, "_mix/")
    dir.create(path, showWarnings = F, recursive = T)
    for(i in partitioned_indices[[1]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_1_h2g_", 
                           h2g, "/real_simulation_2opera/sample_", 
                           i, "_real_simulation_1_ncausal_2_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[2]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_2_h2g_", 
                           h2g, "/real_simulation_2opera/sample_", 
                           i, "_real_simulation_2_ncausal_2_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[3]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/causal_3_h2g_", 
                           h2g, "/real_simulation_2opera/sample_", 
                           i, "_real_simulation_3_ncausal_2_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 

}



set.seed(100)
total_indices <- 495
proportions <- trait_5

# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)

# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))

# Create a vector of group assignments
groups <- rep(1:4, times = group_sizes)


groups <- sample(groups)

# Create a list to store the indices for each group
partitioned_indices <- lapply(1:4, function(i) which(groups == i) - 1)

for(h2g in c(0.02, 0.03, 0.04, 0.05)){
    path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/h2g_", h2g, "_mix/")
    dir.create(path, showWarnings = F, recursive = T)
    for(i in partitioned_indices[[1]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_1_h2g_", 
                           h2g, "/real_simulation_5opera/sample_", 
                           i, "_real_simulation_1_ncausal_5_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[2]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_2_h2g_", 
                           h2g, "/real_simulation_5opera/sample_", 
                           i, "_real_simulation_2_ncausal_5_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[3]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_3_h2g_", 
                           h2g, "/real_simulation_5opera/sample_", 
                           i, "_real_simulation_3_ncausal_5_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
        for(i in partitioned_indices[[4]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/causal_4_h2g_", 
                           h2g, "/real_simulation_5opera/sample_", 
                           i, "_real_simulation_4_ncausal_5_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 

}

set.seed(100)
total_indices <- 495
proportions <- trait_10

# Calculate the exact number of indices for each group
group_sizes <- floor(total_indices * proportions)

# Adjust for any rounding errors
group_sizes[1] <- group_sizes[1] + (total_indices - sum(group_sizes))

# Create a vector of group assignments
groups <- rep(1:5, times = group_sizes)


groups <- sample(groups)

# Create a list to store the indices for each group
partitioned_indices <- lapply(1:5, function(i) which(groups == i) - 1)

for(h2g in c(0.02, 0.03, 0.04, 0.05)){
    path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/h2g_", h2g, "_mix/")
    dir.create(path, showWarnings = F, recursive = T)
    for(i in partitioned_indices[[1]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_1_h2g_", 
                           h2g, "/real_simulation_10opera/sample_", 
                           i, "_real_simulation_1_ncausal_10_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[2]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_2_h2g_", 
                           h2g, "/real_simulation_10opera/sample_", 
                           i, "_real_simulation_2_ncausal_10_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
    for(i in partitioned_indices[[3]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_3_h2g_", 
                           h2g, "/real_simulation_10opera/sample_", 
                           i, "_real_simulation_3_ncausal_10_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
        for(i in partitioned_indices[[4]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_4_h2g_", 
                           h2g, "/real_simulation_10opera/sample_", 
                           i, "_real_simulation_4_ncausal_10_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 
    
            for(i in partitioned_indices[[5]]){
        file_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/causal_5_h2g_", 
                           h2g, "/real_simulation_10opera/sample_", 
                           i, "_real_simulation_5_ncausal_10_trait.rds" )
        file.copy(from = file_path, to = path, overwrite = TRUE)
    } 

}

Transform individual level simulation into sumstat#

library("MASS")
    library("plink2R")
    library("dplyr")
    library("readr")
    library("tidyverse")
    # source some functions to read matrix and inpute the missing data
    source("~/cloud_colocalization/simulation_code/simulate_linreg.R")
    source("~/cloud_colocalization/simulation_code/misc.R")
    calculate_sumstat = function(X, Y){
        Beta = c()
        se = c()
        Freq = c()
        p = c()
        for(mm in 1:ncol(X)){
            rr <- susieR::univariate_regression(X[,mm], Y)
            Beta[mm] = rr$betahat
            se[mm] <- rr$sebetahat
            Freq[mm] = sum(X[,mm])/(2*nrow(X))
            p[mm] = 2 * (1 - pnorm(abs(rr$betahat / rr$sebetahat)))
        }
            tb = tibble(
            SNP = colnames(X),
            Beta = Beta,
            se = se,
            Freq = Freq, p = p) %>% 
        separate(SNP, into = c("Chr", "Bp", "A2", "A1"), sep = "[:_]", remove = FALSE) %>%
        select(Chr, SNP, Bp, A1, A2, Freq, Beta, se, p) %>%
        mutate(Chr = str_replace(Chr, "chr", "")) %>%
        mutate(Chr = as.numeric(Chr), Bp = as.numeric(Bp))
        return(tb)
    }
cnt = 1
ntrait = TRAIT
h2g = H2G

path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/h2g_", h2g, "_mix")
files = list.files(path, full.names = T, pattern = ".rds")
data <- rep(list(NULL), length(files))

for(file in files){
    rds = readRDS(file)
    X = rds$X
    Y = rds$Y
    trait = list()
    trait[[1]] = calculate_sumstat(X, Y[,1, drop = T]) %>% rename(freq = Freq, b = Beta) %>% mutate(N = 1160) %>% select(SNP, A1, A2, freq, b, se, p, N)
    for(i in 2:ncol(Y)){
      trait[[i]] = calculate_sumstat(X, Y[,i, drop = T])
    }
    index =  str_extract(file, "(?<=sample_)\\d+")
    data = list()
    data[["trait"]] = trait
    data$index = index
    data$gwas_var_num = length(rds$variant[[1]])
    dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
    saveRDS(data, paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera/index_", index, ".rds"))
    cnt = cnt + 1
    
}

Example output#

result = readRDS("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/mix_sumstat/h2g_0.02/real_simulation_2opera/index_240.rds")
str(result)
head(result$trait[[1]])
List of 3
 $ trait       :List of 2
  ..$ :Classes ‘tbl_df’, ‘tbl’ and 'data.frame':	6791 obs. of  8 variables:
  .. ..$ SNP : chr [1:6791] "chr12:108093914_C_T" "chr12:108093950_A_G" "chr12:108094333_C_G" "chr12:108094913_A_G" ...
  .. ..$ A1  : chr [1:6791] "T" "G" "G" "G" ...
  .. ..$ A2  : chr [1:6791] "C" "A" "C" "A" ...
  .. ..$ freq: num [1:6791] 0.0924 0.3747 0.3504 0.0928 0.2207 ...
  .. ..$ b   : num [1:6791] -0.4608 0.1953 -0.0277 -0.4819 -0.443 ...
  .. ..$ se  : num [1:6791] 0.314 0.181 0.191 0.314 0.212 ...
  .. ..$ p   : num [1:6791] 0.1425 0.2811 0.885 0.1244 0.0369 ...
  .. ..$ N   : num [1:6791] 1160 1160 1160 1160 1160 1160 1160 1160 1160 1160 ...
  ..$ :Classes ‘tbl_df’, ‘tbl’ and 'data.frame':	6791 obs. of  9 variables:
  .. ..$ Chr : num [1:6791] 12 12 12 12 12 12 12 12 12 12 ...
  .. ..$ SNP : chr [1:6791] "chr12:108093914_C_T" "chr12:108093950_A_G" "chr12:108094333_C_G" "chr12:108094913_A_G" ...
  .. ..$ Bp  : num [1:6791] 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 ...
  .. ..$ A1  : chr [1:6791] "T" "G" "G" "G" ...
  .. ..$ A2  : chr [1:6791] "C" "A" "C" "A" ...
  .. ..$ Freq: num [1:6791] 0.0924 0.3747 0.3504 0.0928 0.2207 ...
  .. ..$ Beta: num [1:6791] -0.1844 -0.1309 0.128 -0.1795 -0.0441 ...
  .. ..$ se  : num [1:6791] 0.203 0.117 0.123 0.203 0.137 ...
  .. ..$ p   : num [1:6791] 0.364 0.263 0.299 0.376 0.748 ...
 $ index       : chr "240"
 $ gwas_var_num: int 1
A tibble: 6 × 8
SNPA1A2freqbsepN
<chr><chr><chr><dbl><dbl><dbl><dbl><dbl>
1chr12:108093914_C_TTC0.09236774-0.460784340.31419030.142491121160
2chr12:108093950_A_GGA0.37467476 0.195335300.18121710.281074921160
3chr12:108094333_C_GGC0.35039029-0.027651130.19117750.884998371160
4chr12:108094913_A_GGA0.09280139-0.481909140.31363590.124409931160
5chr12:108095069_C_TTC0.22072853-0.443016350.21227750.036891051160
6chr12:108095158_C_TTC0.25628794 0.194426790.20862720.351370651160
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=30000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.err

source ~/mamba_activate.sh
module load Singularity

Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/sumstat_generate/trait_TRAIT_h2g_H2G.R
EOF

base_R="base_R"

for trait in 2 5 10; do
    for h2g in 0.02 0.03 0.04 0.05; do
        output_R="trait_${trait}_h2g_${h2g}.R"
        output_sh="trait_${trait}_h2g_${h2g}.sh"
        cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g"   > ${output_R}
        cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g"  > ${output_sh}
        sbatch ${output_sh}
    done
done

Transform sumstat into .esd format and make the .flist summary file#

Also, GWAS trait is saved into .ma format.

library(tidyverse)
cnt = 1
ntrait = TRAIT
h2g = H2G

cwd = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/mix_sumstat/h2g_", h2g, "/real_simulation_", ntrait, "opera/")


dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/", 
                     "h2g_", h2g, "/real_simulation_", ntrait, "opera/"), showWarnings = F, recursive = T)
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/", 
                     "h2g_", h2g, "/real_simulation_", ntrait, "opera/")

gwas = list()
path = vector("list", ntrait)
chr = c()
distance = c()
probe_id = c()


cnt = 1
for(file in list.files(cwd, pattern = ".rds")){
    file_path = paste0(cwd, file)
    rds = readRDS(file_path)
    tad_index = as.numeric(sub(".*index_([0-9]+).*", "\\1", file_path))
    gwas[[cnt]] = rds$trait[[1]]
    distance[cnt] = floor((max(as.numeric(rds$trait[[2]]$Bp)) + min(as.numeric(rds$trait[[2]]$Bp))) / 2)
    chr[cnt] = unique(rds$trait[[2]]$Chr)
    probe_id[cnt] = paste0("Region_", tad_index)
    for(i in 2:ntrait){
        path[[i-1]][cnt] = paste0(output_path, "/region_", tad_index, "_trait_", i, ".esd")
        rds$trait[[i]] %>% write_tsv(path[[i-1]][cnt])
    }
    cnt = cnt + 1
}

for(i in 2:ntrait){
    tibble(Chr = chr, 
           ProbeID = paste0(probe_id, "_", i), 
           GeneticDistance = 0,
           ProbeBp = distance, 
           Gene = paste0(probe_id, "_", i),  
           Orientation = NA,
           PathOfEsd = path[[i -1]]) %>%  
        write_tsv(paste0(output_path,
                         "/trait_", i, ".flist"))
}

bind_rows(gwas) %>% 
    write_tsv(paste0(output_path, 
                     "/trait_1.ma"))

bind_rows(gwas) %>% 
    select(SNP) %>% 
    write_tsv(paste0(output_path,
                     "/remain_snp.tsv"), col_names = FALSE)

Example output#

library(tidyverse)
result = read_delim("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera/region_274_trait_2.esd")
head(result)
Rows: 5504 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): SNP, A1, A2
dbl (6): Chr, Bp, Freq, Beta, se, p

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)
result = read_delim("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera/trait_2.flist")
head(result)
Rows: 495 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): ProbeID, Gene, PathOfEsd
dbl (3): Chr, GeneticDistance, ProbeBp
lgl (1): Orientation

 Use `spec()` to retrieve the full column specification for this data.
 Specify the column types or set `show_col_types = FALSE` to quiet this message.
A tibble: 6 × 7
ChrProbeIDGeneticDistanceProbeBpGeneOrientationPathOfEsd
<dbl><chr><dbl><dbl><chr><lgl><chr>
8Region_0_2 0 17497391Region_0_2 NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_0_trait_2.esd
7Region_1_2 0107320728Region_1_2 NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_1_trait_2.esd
5Region_10_2 0113896770Region_10_2 NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_10_trait_2.esd
20Region_100_20 61926166Region_100_2NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_100_trait_2.esd
20Region_101_20 35475951Region_101_2NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_101_trait_2.esd
1Region_102_20147899365Region_102_2NA/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//region_102_trait_2.esd
base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=50000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.err

source ~/mamba_activate.sh
module load Singularity

Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/opera_sumstat_extract/trait_TRAIT_h2g_H2G.R
EOF

base_R="base_R"

for trait in 2 5 10; do
    for h2g in 0.02 0.03 0.04 0.05; do
        output_R="trait_${trait}_h2g_${h2g}.R"
        output_sh="trait_${trait}_h2g_${h2g}.sh"
        cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g"   > ${output_R}
        cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|H2G|${h2g}|g"  > ${output_sh}
        sbatch ${output_sh}
    done
done

Use SMR to convert .esd files to .bsed files as OPERA input#

job_name="besd"
WD="/home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/make_besd"


mkdir -p ${WD}/code
mkdir -p ${WD}/log

traits=("2" "5" "10")
h2gs=("0.02" "0.03" "0.04" "0.05")

cd ${WD}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J JOBNAME
#SBATCH -o PWD/log/JOBNAME."%j".out
#SBATCH -e PWD/log/JOBNAME."%j".err


source ~/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera

for trait_num in {2..10}; do
    ~/smr_heidi/SMR/smr-1.3.1-linux-x86_64/smr-1.3.1 \
    --eqtl-flist ./trait_${trait_num}.flist \
    --make-besd \
    --cis-wind 3000 \ # extended since our cis region can be larger than 2Mb
    --make-besd-dense \ # keep all SNP info
    --out ./trait_${trait_num}
done

EOF
            
base_script="base_script"
for trait in "${traits[@]}"; do
      for h2g in "${h2gs[@]}"; do  
            output_script="script_trait_${trait}_h2g_${h2g}.sh"
            cat ${base_script} | sed "s|TRAIT|${trait}|g" |  sed "s|H2G|${h2g}|g" | sed "s|PWD|${WD}|g" | sed "s|JOBNAME|${job_name}|g"  > ${output_script}
            sb ${output_script}
            # cat script_variant_${variant}.sh
    done
done

Exposure (QTL) meta file summary#

# Create a two-line table with strings
library(tidyverse)
for(TRAIT in c(2,5,10)){
        for(H2G in c(0.02, 0.03, 0.04, 0.05)){
        path = c()
        ntrait = TRAIT
        h2g = H2G
        cnt = 1
        output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/h2g_", h2g, "/real_simulation_", ntrait, "opera/")
        for(i in c(2:ntrait)){
            path[cnt] = paste0(output_path, "trait_", i)
            cnt = cnt + 1
        }
        data <- data.frame(Path = path)

        # Print the data
        print(data)

        # Save the data to a CSV file
        write_csv(data, paste0(output_path, "/qtl_list"), col_names = FALSE)
        }
}

Example output#

cat /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/qtl_list
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_2
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_3
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_4
/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera/trait_5

Save genotype (LD) metadata in remain_snp_LD file#

# LD
# Create a two-line table with strings
library(tidyverse)
for(ntrait in c(2,5,10)){
        for(h2g in c(0.02, 0.03, 0.04, 0.05)){

            cnt = 1
            output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input_mix/h2g_", h2g, "/real_simulation_", ntrait, "opera/")
            data <- paste0(output_path, "/remain_snp_LD")

            # Print the data
            print(data)

            # Save the data to a CSV file
            write_lines(data, paste0(output_path, "/genotype_data"))
    }
}
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input_mix/h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input_mix/h2g_0.05/real_simulation_10opera//remain_snp_LD"

Stage 1 OPERA (pi estimation)#

#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J opera1
#SBATCH -o opera1.%j.out
#SBATCH -e opera1.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--mbfile ./genotype_data --pi-wind 150 \
--estimate-pi --out pi_estimation
base_sh="base_sh"
for trait in 2 5 10; do
        for h2g in 0.02 0.03 0.04 0.05; do
            output_sh="trait_${trait}_h2g_${h2g}.sh"
            cat ${base_sh}| sed "s|TRAIT|${trait}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
            sbatch ${output_sh}
    done
done

Example output#

cat /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera/pi_estimation.pi
Posteriors	Pi1(0:0:0:0)	Pi2(0:0:0:1)	Pi3(0:0:1:0)	Pi4(0:0:1:1)	Pi5(0:1:0:0)	Pi6(0:1:0:1)	Pi7(0:1:1:0)	Pi8(0:1:1:1)	Pi9(1:0:0:0)	Pi10(1:0:0:1)	Pi11(1:0:1:0)	Pi12(1:0:1:1)	Pi13(1:1:0:0)	Pi14(1:1:0:1)	Pi15(1:1:1:0)	Pi16(1:1:1:1)
Mean	0.275201	0.00228085	0.00233336	0.00224573	0.00231277	0.00220141	0.00223646	0.00227284	0.00246007	0.00209981	0.00291004	0.00263845	0.00362954	0.00242097	0.00327664	0.68948
SD	0.0643589	0.00712061	0.00685669	0.00697502	0.00686388	0.00684095	0.00673787	0.00699348	0.00695986	0.00649404	0.00799288	0.00752632	0.00940491	0.00681977	0.00880387	0.0667714

Stage 2 OPERA#

#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=80000
#SBATCH -J opera2
#SBATCH -o opera2.%j.out
#SBATCH -e opera2.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input_mix/h2g_H2G/real_simulation_TRAITopera

/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
    --bfile ./remain_snp_LD \
    --prior-pi-file ./pi_estimation.pi \
    --prior-var-file ./pi_estimation.var \
    --out ./opera_res --outcome-wind 1500

base_sh="base_sh"
for trait in 2 5 10; do
        for h2g in 0.02 0.03 0.04 0.05; do
            output_sh="trait_${trait}_h2g_${h2g}.sh"
            cat ${base_sh}| sed "s|TRAIT|${trait}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
            sbatch ${output_sh}
    done
done

Example output#

head /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input_mix/h2g_0.05/real_simulation_5opera/opera_res_4_expos_assoc.ppa
Chr	Expo1_ID	Expo1_bp	Expo2_ID	Expo2_bp	Expo3_ID	Expo3_bp	Expo4_ID	Expo4_bp	PPA(1,2,3,4)	p_HEIDI(1,2,3,4)
10	Region_241_2	37025329	Region_241_3	37025329	Region_241_4	37025329	Region_241_5	37025329	0.999967	5.250539e-01
11	Region_107_2	11927391	Region_107_3	11927391	Region_107_4	11927391	Region_107_5	11927391	0.999991	8.772566e-01
11	Region_108_2	18057498	Region_108_3	18057498	Region_108_4	18057498	Region_108_5	18057498	1	6.443910e-01
13	Region_43_2	54814185	Region_43_3	54814185	Region_43_4	54814185	Region_43_5	54814185	0.999916	5.845443e-01
13	Region_449_2	30572631	Region_449_3	30572631	Region_449_4	30572631	Region_449_5	30572631	0.999933	3.243127e-01
18	Region_41_2	61335205	Region_41_3	61335205	Region_41_4	61335205	Region_41_5	61335205	0.999989	8.673183e-01
1	Region_124_2	84179558	Region_124_3	84179558	Region_124_4	84179558	Region_124_5	84179558	0.999975	4.870884e-01
2	Region_73_2	59140327	Region_73_3	59140327	Region_73_4	59140327	Region_73_5	59140327	0.999987	9.353629e-01
3	Region_246_2	108165863	Region_246_3	108165863	Region_246_4	108165863	Region_246_5	108165863	0.999999	3.673531e-01

Previous version: not mix causal variant by configuration#

For each causal variant number get a separate result.

Convert individual level simulation results to sumstat#

library("MASS")
    library("plink2R")
    library("dplyr")
    library("readr")
    library("tidyverse")
    # source some functions to read matrix and inpute the missing data
    source("~/cloud_colocalization/simulation_code/simulate_linreg.R")
    source("~/cloud_colocalization/simulation_code/misc.R")
    calculate_sumstat = function(X, Y){
        Beta = c()
        se = c()
        Freq = c()
        p = c()
        for(mm in 1:ncol(X)){
            rr <- susieR::univariate_regression(X[,mm], Y)
            Beta[mm] = rr$betahat
            se[mm] <- rr$sebetahat
            Freq[mm] = sum(X[,mm])/(2*nrow(X))
            p[mm] = 2 * (1 - pnorm(abs(rr$betahat / rr$sebetahat)))
        }
            tb = tibble(
            SNP = colnames(X),
            Beta = Beta,
            se = se,
            Freq = Freq, p = p) %>% 
        separate(SNP, into = c("Chr", "Bp", "A2", "A1"), sep = "[:_]", remove = FALSE) %>%
        select(Chr, SNP, Bp, A1, A2, Freq, Beta, se, p) %>%
        mutate(Chr = str_replace(Chr, "chr", "")) %>%
        mutate(Chr = as.numeric(Chr), Bp = as.numeric(Bp))
        return(tb)
    }
cnt = 1
ntrait = TRAIT
ncausal = CAUSAL
h2g = H2G

path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera")
files = list.files(path, full.names = T, pattern = ".rds")
data <- rep(list(NULL), length(files))

for(file in files){
    rds = readRDS(file)
    X = rds$X
    Y = rds$Y
    trait = list()
    trait[[1]] = calculate_sumstat(X, Y[,1, drop = T]) %>% rename(freq = Freq, b = Beta) %>% mutate(N = 1160) %>% select(SNP, A1, A2, freq, b, se, p, N)
    for(i in 2:ncol(Y)){
      trait[[i]] = calculate_sumstat(X, Y[,i, drop = T])
    }
    index =  str_extract(file, "(?<=sample_)\\d+")
    data = list()
    data[["trait"]] = trait
    data$index = index
    data$gwas_var_num = length(rds$variant[[1]])
    dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
    saveRDS(data, paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_", ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/index_", index, "_ncausal_", ncausal,".rds"))
    cnt = cnt + 1
    
}

Submission script#

base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=30000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/sumstat_generate/log/extract.%j.err

source ~/mamba_activate.sh
module load Singularity

Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/sumstat_generate/trait_TRAIT_causal_CAUSAL_h2g_H2G.R
EOF

base_R="base_R"

for trait in 2 5 10; do
    for causal in 1 2 3 4 5; do
        for h2g in 0.02 0.03 0.04 0.05; do
        output_R="trait_${trait}_causal_${causal}_h2g_${h2g}.R"
        output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
        cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g"   > ${output_R}
        cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
        sbatch ${output_sh}
        done
    done
done
library(tidyverse)
cnt = 1
ntrait = TRAIT
ncausal = CAUSAL
h2g = H2G

cwd = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/sumstat/causal_", 
             ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")


dir.create(paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_", 
             ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera"), showWarnings = F, recursive = T)
output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_", 
             ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")

gwas = list()
path = vector("list", ntrait)
chr = c()
distance = c()
probe_id = c()


cnt = 1
for(file in list.files(cwd, pattern = paste0(ncausal, ".rds"))){
    file_path = paste0(cwd, file)
    rds = readRDS(file_path)
    tad_index = as.numeric(sub(".*index_([0-9]+).*", "\\1", file_path))
    gwas[[cnt]] = rds$trait[[1]]
    distance[cnt] = floor((max(as.numeric(rds$trait[[2]]$Bp)) + min(as.numeric(rds$trait[[2]]$Bp))) / 2)
    chr[cnt] = unique(rds$trait[[2]]$Chr)
    probe_id[cnt] = paste0("Region_", tad_index)
    for(i in 2:ntrait){
        path[[i-1]][cnt] = paste0(output_path, "/region_", tad_index, "_trait_", i, ".esd")
        rds$trait[[i]] %>% write_tsv(path[[i-1]][cnt])
    }
    cnt = cnt + 1
}

for(i in 2:ntrait){
    tibble(Chr = chr, 
           ProbeID = paste0(probe_id, "_", i), 
           GeneticDistance = 0,
           ProbeBp = distance, 
           Gene = paste0(probe_id, "_", i),  
           Orientation = NA,
           PathOfEsd = path[[i -1]]) %>%  
        write_tsv(paste0(output_path,
                         "/trait_", i, ".flist"))
}

bind_rows(gwas) %>% 
    write_tsv(paste0(output_path, 
                     "/trait_1.ma"))

bind_rows(gwas) %>% 
    select(SNP) %>% 
    write_tsv(paste0(output_path,
                     "/remain_snp.tsv"), col_names = FALSE)

Submission script#

base_sh="base_script"
# Create the base_script file and write the bash code into it
cat << 'EOF' > ${base_sh}
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 78:00:00
#SBATCH --mem=50000
#SBATCH -J extract
#SBATCH -o /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.out
#SBATCH -e /home/hs3393/cloud_colocalization/submission_script/opera_sumstat_extract/log/config.%j.err

source ~/mamba_activate.sh
module load Singularity

Rscript /home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/opera_sumstat_extract/trait_TRAIT_causal_CAUSAL_h2g_H2G.R
EOF

base_R="base_R"

for trait in 2 5 10; do
    for causal in 1 2 3 4 5; do
        for h2g in 0.02 0.03 0.04 0.05; do
        output_R="trait_${trait}_causal_${causal}_h2g_${h2g}.R"
        output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
        cat ${base_R}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g"   > ${output_R}
        cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
        sbatch ${output_sh}
        done
    done
done

Use SMR to convert .esd files to .bsed files as OPERA input#

job_name="besd"
WD="/home/hs3393/cb_simulation/simulation_data/simulation_opera/opera_code/make_besd"


mkdir -p ${WD}/code
mkdir -p ${WD}/log

traits=("2" "5" "10")
causals=("1" "2" "3" "4" "5")
h2gs=("0.02" "0.03" "0.04" "0.05")

cd ${WD}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J JOBNAME
#SBATCH -o PWD/log/JOBNAME."%j".out
#SBATCH -e PWD/log/JOBNAME."%j".err


source ~/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera

for trait_num in {2..10}; do
    ~/smr_heidi/SMR/smr-1.3.1-linux-x86_64/smr-1.3.1 \
    --eqtl-flist ./trait_${trait_num}.flist \
    --make-besd \
    --cis-wind 3000 \ # enlarged since our cis region is larger than 2M bases
    --make-besd-dense \ # keep all SNP info
    --out ./trait_${trait_num}
done

EOF
            
base_script="base_script"
for trait in "${traits[@]}"; do
    for causal in "${causals[@]}"; do
      for h2g in "${h2gs[@]}"; do  
            output_script="script_n_causal_${causal}_trait_${trait}_h2g_${h2g}.sh"
            cat ${base_script} | sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g" | sed "s|H2G|${h2g}|g" | sed "s|PWD|${WD}|g" | sed "s|JOBNAME|${job_name}|g"  > ${output_script}
            sb ${output_script}
            # cat script_variant_${variant}.sh
        done
    done
done

Exposure (QTL) meta file summary#

# Create a two-line table with strings
library(tidyverse)
for(TRAIT in c(2,5,10)){
    for(CAUSAL in c(1,2,3,4,5)){
        for(H2G in c(0.02, 0.03, 0.04, 0.05)){
        path = c()
        ntrait = TRAIT
        ncausal = CAUSAL
        h2g = H2G
        cnt = 1
        output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_", 
         ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
        for(i in c(2:ntrait)){
            path[cnt] = paste0(output_path, "trait_", i)
            cnt = cnt + 1
        }
        data <- data.frame(Path = path)

        # Print the data
        print(data)

        # Save the data to a CSV file
        write_csv(data, paste0(output_path, "/qtl_list"), col_names = FALSE)
        }
    }
}

Subset the genome-wide SNP PLINK binary file#

Only include the variants in our selected non-overlapping regions for stage 1 prior estimation.

#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J LDextract
#SBATCH -o LDextract.%j.out
#SBATCH -e LDextract.%j.err

source ~/mamba_activate.sh
module load Singularity
cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera

plink2 --bfile /mnt/vast/hpc/csg/molecular_phenotype_calling/genotype_arch/ROSMAP_NIA_WGS.leftnorm.filtered --extract remain_snp.tsv --make-bed --out remain_snp_LD

base_sh="base_sh"
for trait in 2 5 10; do
    for causal in 1 2 3 4 5; do
        for h2g in 0.02 0.03 0.04 0.05; do
            output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
            cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
            #sbatch ${output_sh}
        done
    done
done

Save genotype (LD) metadata in remain_snp_LD file#

# LD
# Create a two-line table with strings
library(tidyverse)
for(ntrait in c(2,5,10)){
    for(ncausal in c(1,2,3,4,5)){
        for(h2g in c(0.02, 0.03, 0.04, 0.05)){

            cnt = 1
            output_path = paste0("/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_", ntrait, "opera/opera_input/causal_", 
             ncausal, "_h2g_", h2g, "/real_simulation_", ntrait, "opera/")
            data <- paste0(output_path, "/remain_snp_LD")

            # Print the data
            print(data)

            # Save the data to a CSV file
            write_lines(data, paste0(output_path, "/genotype_data"))
        }
    }
}
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_1_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_2_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_3_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_4_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.02/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.03/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.04/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_2opera/opera_input/causal_5_h2g_0.05/real_simulation_2opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_1_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_2_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_3_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_4_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.02/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.03/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.04/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_5opera/opera_input/causal_5_h2g_0.05/real_simulation_5opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_1_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_2_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_3_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_4_h2g_0.05/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.02/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.03/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.04/real_simulation_10opera//remain_snp_LD"
[1] "/home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_10opera/opera_input/causal_5_h2g_0.05/real_simulation_10opera//remain_snp_LD"

Stage 1 OPERA (pi estimation)#

#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=60000
#SBATCH -J opera1
#SBATCH -o opera1.%j.out
#SBATCH -e opera1.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera
/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
--mbfile ./genotype_data --pi-wind 150 \
--estimate-pi --out pi_estimation

Stage 2 OPERA#

#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 36:00:00
#SBATCH --mem=80000
#SBATCH -J opera2
#SBATCH -o opera2.%j.out
#SBATCH -e opera2.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd /home/hs3393/cb_simulation/simulation_data/simulation_opera/real_simulation_TRAITopera/opera_input/causal_CAUSAL_h2g_H2G/real_simulation_TRAITopera

/home/hs3393/opera_Linux/opera_Linux --besd-flist qtl_list --gwas-summary ./trait_1.ma \
    --bfile ./remain_snp_LD \
    --prior-pi-file ./pi_estimation.pi \
    --prior-var-file ./pi_estimation.var \
    --out ./opera_res --outcome-wind 1500

base_sh="base_sh"
for trait in 2 5 10; do
    for causal in 1 2 3 4 5; do
        for h2g in 0.02 0.03 0.04 0.05; do
            output_sh="trait_${trait}_causal_${causal}_h2g_${h2g}.sh"
            cat ${base_sh}| sed "s|TRAIT|${trait}|g" | sed "s|CAUSAL|${causal}|g"  | sed "s|H2G|${h2g}|g"  > ${output_sh}
            sbatch ${output_sh}
        done
    done
done