Colocalization Result Summary#
Goal#
coloc_summary
: Summarize the result based on the key four outputs to calculate FDR and power.
coloc_focal_summary
: Summarize the result but only focus on the CoS including the focal trait (remove thoe CoS that is not colocalizing on focal trait).
Fit any coloclaization method generated by the notebooks in this repo. As long as they are summarized to have the elements:
coloc_trait
: traits identified by ColocBoost as having shared genetic effectscoloc_set
: sets of variants identified as causaltrue_trait
: traits with true causal relationships (ground truth)true_variant
: variants that are truly causal (ground truth)
Input#
File(s) of colocalization result.
Output#
A dataframe, each row is a summarization of one coloclization result.
Example output:
result = readRDS("../simulation_result/hyprcoloc/hyp_real_simulation_10trait/summary/result_summary.rds")
head(result)
total_causal_var_number | perfect_causal_var_number | partial_causal_var_number | true_trait_number | predict_trait_number | total_trait_number | true_set_number | total_set_number | single_set_number | max_causal | file | out_file | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <dbl> | <dbl> | <dbl> | <int> | <dbl> | <dbl> | <int> | <int> | <int> | <chr> | <chr> | |
1 | 1 | 1 | 1 | 5 | 5 | 5 | 1 | 1 | 0 | 1 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_1_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds |
2 | 2 | 1 | 1 | 5 | 5 | 8 | 1 | 1 | 0 | 2 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_2_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds |
3 | 3 | 0 | 2 | 4 | 4 | 8 | 2 | 2 | 0 | 3 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_3_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_3_ncausal_10_trait_ntr_10_hypercoloc.rds |
4 | 4 | 0 | 3 | 10 | 10 | 17 | 3 | 3 | 0 | 3 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_4_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_4_ncausal_10_trait_ntr_10_hypercoloc.rds |
5 | 1 | 1 | 1 | 4 | 4 | 4 | 1 | 1 | 0 | 1 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_1_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds |
6 | 2 | 1 | 2 | 6 | 6 | 7 | 2 | 2 | 0 | 2 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_2_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds |
Summary code#
[coloc_summary]
# ======= Define parameters =======
parameter: folder = path
parameter: cwd = path("output")
parameter: job_size = 1
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: container = ""
# ======= Workflow input/output =======
input: folder, group_by = 1
output: f'{cwd:a}/{_input[0]:b}_summary.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
# ======= Main R block =======
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library("tidyverse")
# ======= Helper functions to evaluate colocalization =======
# Evaluate for each true causal variant: how many true traits were recovered
coloc_trait_vec = function(true_variant, true_trait, coloc_set, coloc_trait) {
var_num = length(true_variant)
my_vector <- numeric(var_num)
for (i in 1:var_num) {
for (j in 1:length(coloc_set)) {
if (true_variant[i] %in% unlist(coloc_set[[j]])) {
difference = setdiff(coloc_trait[[j]], true_trait[[i]])
if (length(difference) > 0) {
my_vector[i] = max(my_vector[i], 0)
} else {
intersect_length = length(intersect(coloc_trait[[j]], true_trait[[i]]))
my_vector[i] = max(my_vector[i], intersect_length)
}
}
}
}
return(my_vector)
}
# Return number of true traits per true causal variant
true_trait_vec = function(true_variant, true_trait) {
var_num = length(true_variant)
my_vector <- numeric(var_num)
for (i in 1:var_num) {
my_vector[i] = length(true_trait[[i]])
}
return(my_vector)
}
# Evaluate for each detected coloc set: did it capture true traits correctly
coloc_set_vec = function(true_variant, true_trait, coloc_set, coloc_trait) {
set_num = length(coloc_set)
my_vector <- numeric(set_num)
for (i in 1:length(true_variant)) {
for (j in 1:set_num) {
if (true_variant[i] %in% unlist(coloc_set[[j]])) {
difference = setdiff(coloc_trait[[j]], true_trait[[i]])
if (length(difference) > 0) {
my_vector[j] = max(my_vector[j], 0)
} else {
my_vector[j] = max(my_vector[j], 1)
}
}
}
}
return(my_vector)
}
# Count number of perfect matches (full trait recovery)
perfect_number = function(coloc_trait_vector, true_trait_vector) {
perfect_match_number = 0
for (i in 1:length(coloc_trait_vector)) {
if (coloc_trait_vector[i] == true_trait_vector[i]) {
perfect_match_number = perfect_match_number + 1
}
}
return(perfect_match_number)
}
# Count number of partial matches (any recovery > 0)
partial_number = function(coloc_trait_vector, true_trait_vector) {
partial_match_number = 0
for (i in 1:length(coloc_trait_vector)) {
if (coloc_trait_vector[i] > 0) {
partial_match_number = partial_match_number + 1
}
}
return(partial_match_number)
}
# ======= Main colocboost_summary function =======
colocboost_summary = function(true_variant, true_trait, coloc_set, coloc_trait) {
if (length(coloc_set) == 0) {
coloc_set = NULL
}
coloc_trait_vector = coloc_trait_vec(true_variant, true_trait, coloc_set, coloc_trait)
true_trait_vector = true_trait_vec(true_variant, true_trait)
coloc_set_vector = coloc_set_vec(true_variant, true_trait, coloc_set, coloc_trait)
total_causal_var_number = length(true_variant)
perfect_causal_var_number = perfect_number(coloc_trait_vector, true_trait_vector)
partial_causal_var_number = partial_number(coloc_trait_vector, true_trait_vector)
true_trait_number = sum(coloc_trait_vector)
total_trait_number = sum(true_trait_vector)
true_set_number = sum(coloc_set_vector)
total_set_number = length(coloc_set_vector)
set_sizes = c()
for (i in 1:length(coloc_set)) {
set_sizes[i] = length(coloc_set[[i]])
}
return(list(
total_causal_var_number = total_causal_var_number,
perfect_causal_var_number = perfect_causal_var_number,
partial_causal_var_number = partial_causal_var_number,
true_trait_number = true_trait_number,
predict_trait_number = length(unlist(coloc_trait)),
total_trait_number = total_trait_number,
true_set_number = true_set_number,
total_set_number = total_set_number,
set_sizes = set_sizes
))
}
# ======= Read and summarize each .rds result file =======
filenames <- list.files(${folder:r}, pattern = "*.rds$", full.names = TRUE, recursive = TRUE)
read_and_extract_rds <- function(file_path) {
rds_file <- readRDS(file_path)
if (length(rds_file$coloc_set) == 0) {
rds_file$coloc_set = NULL
}
data = colocboost_summary(
rds_file$true_variant,
rds_file$true_trait,
rds_file$coloc_set,
rds_file$coloc_trait
)
table <- tibble(
total_causal_var_number = data$total_causal_var_number,
perfect_causal_var_number = data$perfect_causal_var_number,
partial_causal_var_number = data$partial_causal_var_number,
true_trait_number = data$true_trait_number,
predict_trait_number = data$predict_trait_number,
total_trait_number = data$total_trait_number,
true_set_number = data$true_set_number,
total_set_number = data$total_set_number,
single_set_number = length(rds_file$csets),
max_causal = max(rds_file$true_trait %>% unlist() %>% table()),
file = rds_file$file,
out_file = file_path
)
return(table)
}
# ======= Combine all summaries together =======
combined_table <- map_dfr(filenames, read_and_extract_rds)
# ======= Save final summary =======
saveRDS(combined_table, ${_output:r})
[coloc_focal_summary]
# ======= Define parameters =======
## Only focus on results that include the target trait (trait 1)
parameter: folder = path
parameter: cwd = path("output")
parameter: job_size = 1
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: container = ""
# ======= Workflow input/output =======
input: folder, group_by = 1
output: f'{cwd:a}/{_input[0]:b}_summary.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
# ======= Main R block =======
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library("tidyverse")
coloc_trait_vec = function(true_variant, true_trait, coloc_set, coloc_trait){
var_num = length(true_variant)
## initialize 0 vector for each true variant
my_vector <- numeric(var_num)
# loop for every ture_causal_variant
for(i in 1:var_num){
# loop for every colocalization set result to see which true_snp is in the set
for (j in 1:length(coloc_set)){
if(true_variant[i] %in% unlist(coloc_set[[j]])){
# if coloc trait has more than true trait, set as 0 (stringent)
difference = setdiff(coloc_trait[[j]], true_trait[[i]])
if(length(difference) > 0){
my_vector[i] = max(my_vector[i], 0)
}else{
# check the number of elements that is intersection
intersect_length = length(intersect(coloc_trait[[j]], true_trait[[i]]))
my_vector[i] = max(my_vector[i], intersect_length)
}
}
}
}
return(my_vector)
}
true_trait_vec = function(true_variant, true_trait){
var_num = length(true_variant)
my_vector <- numeric(var_num)
# how many traits are in each true trait set
for(i in 1:var_num){
my_vector[i] = length(true_trait[[i]])
}
return(my_vector)
}
coloc_set_vec = function(true_variant, true_trait, coloc_set, coloc_trait){
set_num = length(coloc_set)
## initialize 0 vector for each coloc set
my_vector <- numeric(set_num)
# loop for every ture_causal_variant
for(i in 1:length(true_variant)){
# loop for every colocalization set result to see which true_snp is in the set
for (j in 1:set_num){
# use the same index as the coloc_set to find corresponding coloc_traits for that set,
# see how many traits are successfully captured
if(true_variant[i] %in% unlist(coloc_set[[j]])){
difference = setdiff(coloc_trait[[j]], true_trait[[i]])
if(length(difference) > 0){
my_vector[j] = max(my_vector[j], 0)
}else{
my_vector[j] = max(my_vector[j], 1)
}
}
}
}
return(my_vector)
}
perfect_number = function(coloc_trait_vector, true_trait_vector){
perfect_match_number = 0
for (i in 1:length(coloc_trait_vector)){
if(coloc_trait_vector[i] == true_trait_vector[i]){
perfect_match_number = perfect_match_number + 1
}
}
return(perfect_match_number)
}
partial_number = function(coloc_trait_vector, true_trait_vector){
partial_match_number = 0
for (i in 1:length(coloc_trait_vector)){
if(coloc_trait_vector[i] > 0){
partial_match_number = partial_match_number + 1
}
}
return(partial_match_number)
}
colocboost_summary = function(true_variant, true_trait, coloc_set, coloc_trait){
if(length(coloc_set) == 0){
coloc_set = NULL
}
total_causal_var_number = length(true_variant)
if(total_causal_var_number == 0){
return(list(total_causal_var_number = 0,
perfect_causal_var_number = 0,
partial_causal_var_number = 0,
true_trait_number = 0,
predict_trait_number = length(unlist(coloc_trait)),
total_trait_number = 0,
true_set_number = 0,
total_set_number = length(coloc_set)))
}
coloc_trait_vector = coloc_trait_vec(true_variant, true_trait, coloc_set, coloc_trait)
true_trait_vector = true_trait_vec(true_variant, true_trait)
coloc_set_vector = coloc_set_vec(true_variant, true_trait,coloc_set, coloc_trait)
perfect_causal_var_number = perfect_number(coloc_trait_vector, true_trait_vector )
partial_causal_var_number = partial_number(coloc_trait_vector, true_trait_vector)
true_trait_number = sum(coloc_trait_vector)
total_trait_number = sum(true_trait_vector)
true_set_number = sum(coloc_set_vector)
total_set_number = length(coloc_set_vector)
set_sizes = c()
for(i in c(1:length(coloc_set))){
set_sizes[i] = length(coloc_set[[i]])
}
### ture_trait : in the prediction, which are correct
### predict_trait : the total numbe rof predicted traits (some of the sets will be wrong!)
return(list(total_causal_var_number = total_causal_var_number,
perfect_causal_var_number = perfect_causal_var_number,
partial_causal_var_number = partial_causal_var_number,
true_trait_number = true_trait_number,
predict_trait_number = length(unlist(coloc_trait)),
total_trait_number = total_trait_number,
true_set_number = true_set_number,
total_set_number = total_set_number,
set_sizes = set_sizes))
}
# --- List all .rds files ---
filenames <- list.files(${folder:r}, pattern = "*.rds$", full.names = TRUE, recursive = TRUE)
# --- Define file reading and filtering function ---
read_and_extract_rds <- function(file_path) {
rds_file <- readRDS(file_path)
# If no coloc sets detected at all, set to NULL
if (length(rds_file$coloc_set) == 0) {
rds_file$coloc_set = NULL
}
# --- Filter coloc sets that contain the target trait (trait 1) ---
trait_with_target = sapply(rds_file$coloc_trait, function(x) any(x == 1))
if (all(!trait_with_target)) {
rds_file$coloc_trait = NULL
rds_file$coloc_set = NULL
} else {
rds_file$coloc_trait = rds_file$coloc_trait[trait_with_target]
rds_file$coloc_set = rds_file$coloc_set[trait_with_target]
}
# --- Filter true causal variants that involve the target trait ---
trait_with_target = sapply(rds_file$true_trait, function(x) any(x == 1))
if (all(!trait_with_target)) {
rds_file$true_trait = NULL
rds_file$true_variant = NULL
} else {
rds_file$true_trait = rds_file$true_trait[trait_with_target]
rds_file$true_variant = rds_file$true_variant[trait_with_target]
}
# --- Summarize coloc performance ---
data = colocboost_summary(
rds_file$true_variant,
rds_file$true_trait,
rds_file$coloc_set,
rds_file$coloc_trait
)
# --- Store into tibble ---
table <- tibble(
total_causal_var_number = data$total_causal_var_number,
perfect_causal_var_number = data$perfect_causal_var_number,
partial_causal_var_number = data$partial_causal_var_number,
true_trait_number = data$true_trait_number,
predict_trait_number = data$predict_trait_number,
total_trait_number = data$total_trait_number,
true_set_number = data$true_set_number,
total_set_number = data$total_set_number,
single_set_number = length(rds_file$csets),
max_causal = max(rds_file$true_trait %>% unlist() %>% table()),
file = rds_file$file,
out_file = file_path
)
return(table)
}
# ======= Apply the summary extraction over all files =======
combined_table <- map_dfr(filenames, read_and_extract_rds)
# ======= Save final summary table =======
saveRDS(combined_table, ${_output:r})
Summary Bash commands#
ColocBoost summary#
## 2 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 5 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 10 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 20 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
Hyprcoloc summary#
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
MOLOC summary#
# moloc
data_dir="/home/hs3393/cb_Mar/simulation_result/moloc/moloc_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
SuSiE coloc summary#
data_dir="/home/hs3393/cb_Mar/simulation_result/susie_coloc/susie_coloc_real_simulation_2trait"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}