Colocalization Result Summary#

Goal#

coloc_summary : Summarize the result based on the key four outputs to calculate FDR and power.

coloc_focal_summary: Summarize the result but only focus on the CoS including the focal trait (remove thoe CoS that is not colocalizing on focal trait).

Fit any coloclaization method generated by the notebooks in this repo. As long as they are summarized to have the elements:

  • coloc_trait: traits identified by ColocBoost as having shared genetic effects

  • coloc_set: sets of variants identified as causal

  • true_trait: traits with true causal relationships (ground truth)

  • true_variant: variants that are truly causal (ground truth)

Input#

File(s) of colocalization result.

Output#

A dataframe, each row is a summarization of one coloclization result.

Example output:

result = readRDS("../simulation_result/hyprcoloc/hyp_real_simulation_10trait/summary/result_summary.rds")
head(result)
A tibble: 6 × 12
total_causal_var_numberperfect_causal_var_numberpartial_causal_var_numbertrue_trait_numberpredict_trait_numbertotal_trait_numbertrue_set_numbertotal_set_numbersingle_set_numbermax_causalfileout_file
<int><dbl><dbl><dbl><int><dbl><dbl><int><int><int><chr><chr>
1111 5 5 51101/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_1_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds
2211 5 5 81102/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_2_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds
3302 4 4 82203/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_3_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_3_ncausal_10_trait_ntr_10_hypercoloc.rds
44031010173303/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_4_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_4_ncausal_10_trait_ntr_10_hypercoloc.rds
5111 4 4 41101/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_1_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds
6212 6 6 72202/mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_2_ncausal_10_trait.rds/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds

Summary code#

[coloc_summary]
# ======= Define parameters =======
parameter: folder = path
parameter: cwd = path("output")
parameter: job_size = 1
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: container = ""

# ======= Workflow input/output =======
input: folder, group_by = 1
output: f'{cwd:a}/{_input[0]:b}_summary.rds'

task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'

# ======= Main R block =======
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container

    library("tidyverse")

    # ======= Helper functions to evaluate colocalization =======

    # Evaluate for each true causal variant: how many true traits were recovered
    coloc_trait_vec = function(true_variant, true_trait, coloc_set, coloc_trait) {
        var_num = length(true_variant)
        my_vector <- numeric(var_num)

        for (i in 1:var_num) {
            for (j in 1:length(coloc_set)) {
                if (true_variant[i] %in% unlist(coloc_set[[j]])) {
                    difference = setdiff(coloc_trait[[j]], true_trait[[i]])
                    if (length(difference) > 0) {
                        my_vector[i] = max(my_vector[i], 0)
                    } else {
                        intersect_length = length(intersect(coloc_trait[[j]], true_trait[[i]]))
                        my_vector[i] = max(my_vector[i], intersect_length)
                    }
                }
            }
        }
        return(my_vector)
    }

    # Return number of true traits per true causal variant
    true_trait_vec = function(true_variant, true_trait) {  
        var_num = length(true_variant)
        my_vector <- numeric(var_num)
        for (i in 1:var_num) {
            my_vector[i] = length(true_trait[[i]])
        }
        return(my_vector)
    }

    # Evaluate for each detected coloc set: did it capture true traits correctly
    coloc_set_vec = function(true_variant, true_trait, coloc_set, coloc_trait) {
        set_num = length(coloc_set)
        my_vector <- numeric(set_num)

        for (i in 1:length(true_variant)) {
            for (j in 1:set_num) {
                if (true_variant[i] %in% unlist(coloc_set[[j]])) {
                    difference = setdiff(coloc_trait[[j]], true_trait[[i]])
                    if (length(difference) > 0) {
                        my_vector[j] = max(my_vector[j], 0)
                    } else {
                        my_vector[j] = max(my_vector[j], 1)
                    }
                }
            }
        }
        return(my_vector)
    }

    # Count number of perfect matches (full trait recovery)
    perfect_number = function(coloc_trait_vector, true_trait_vector) {
        perfect_match_number = 0
        for (i in 1:length(coloc_trait_vector)) {
            if (coloc_trait_vector[i] == true_trait_vector[i]) {
                perfect_match_number = perfect_match_number + 1
            }
        }
        return(perfect_match_number)
    }

    # Count number of partial matches (any recovery > 0)
    partial_number = function(coloc_trait_vector, true_trait_vector) {
        partial_match_number = 0
        for (i in 1:length(coloc_trait_vector)) {
            if (coloc_trait_vector[i] > 0) {
                partial_match_number = partial_match_number + 1
            }
        }
        return(partial_match_number)
    }

    # ======= Main colocboost_summary function =======

    colocboost_summary = function(true_variant, true_trait, coloc_set, coloc_trait) {
        if (length(coloc_set) == 0) {
            coloc_set = NULL
        }

        coloc_trait_vector = coloc_trait_vec(true_variant, true_trait, coloc_set, coloc_trait)
        true_trait_vector = true_trait_vec(true_variant, true_trait)
        coloc_set_vector = coloc_set_vec(true_variant, true_trait, coloc_set, coloc_trait)

        total_causal_var_number = length(true_variant)
        perfect_causal_var_number = perfect_number(coloc_trait_vector, true_trait_vector)
        partial_causal_var_number = partial_number(coloc_trait_vector, true_trait_vector)
        true_trait_number = sum(coloc_trait_vector)
        total_trait_number = sum(true_trait_vector)
        true_set_number = sum(coloc_set_vector)
        total_set_number = length(coloc_set_vector)

        set_sizes = c()
        for (i in 1:length(coloc_set)) {
            set_sizes[i] = length(coloc_set[[i]])
        }

        return(list(
            total_causal_var_number = total_causal_var_number,
            perfect_causal_var_number = perfect_causal_var_number,
            partial_causal_var_number = partial_causal_var_number,
            true_trait_number = true_trait_number,
            predict_trait_number = length(unlist(coloc_trait)),
            total_trait_number = total_trait_number,
            true_set_number = true_set_number,
            total_set_number = total_set_number,
            set_sizes = set_sizes
        ))
    }

    # ======= Read and summarize each .rds result file =======

    filenames <- list.files(${folder:r}, pattern = "*.rds$", full.names = TRUE, recursive = TRUE)

    read_and_extract_rds <- function(file_path) {
        rds_file <- readRDS(file_path)

        if (length(rds_file$coloc_set) == 0) {
            rds_file$coloc_set = NULL
        }

        data = colocboost_summary(
            rds_file$true_variant, 
            rds_file$true_trait, 
            rds_file$coloc_set, 
            rds_file$coloc_trait
        )

        table <- tibble(
            total_causal_var_number = data$total_causal_var_number,
            perfect_causal_var_number = data$perfect_causal_var_number,
            partial_causal_var_number = data$partial_causal_var_number,
            true_trait_number = data$true_trait_number,
            predict_trait_number = data$predict_trait_number,
            total_trait_number = data$total_trait_number,
            true_set_number = data$true_set_number,
            total_set_number = data$total_set_number,
            single_set_number = length(rds_file$csets),
            max_causal = max(rds_file$true_trait %>% unlist() %>% table()),
            file = rds_file$file,
            out_file = file_path
        )

        return(table)
    }

    # ======= Combine all summaries together =======

    combined_table <- map_dfr(filenames, read_and_extract_rds)

    # ======= Save final summary =======

    saveRDS(combined_table, ${_output:r})
[coloc_focal_summary]
# ======= Define parameters =======
## Only focus on results that include the target trait (trait 1)
parameter: folder = path
parameter: cwd = path("output")
parameter: job_size = 1
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: container = ""

# ======= Workflow input/output =======
input: folder, group_by = 1
output: f'{cwd:a}/{_input[0]:b}_summary.rds'

task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'

# ======= Main R block =======
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container

    library("tidyverse")
    
    coloc_trait_vec = function(true_variant, true_trait, coloc_set, coloc_trait){
    var_num = length(true_variant)
    ## initialize 0 vector for each true variant   
    my_vector <- numeric(var_num)
    # loop for every ture_causal_variant
    for(i in 1:var_num){
    # loop for every colocalization set result to see which true_snp is in the set
        for (j in 1:length(coloc_set)){
            if(true_variant[i] %in% unlist(coloc_set[[j]])){
                # if coloc trait has more than true trait, set as 0 (stringent)
                difference = setdiff(coloc_trait[[j]], true_trait[[i]])
                if(length(difference) > 0){
                    my_vector[i] = max(my_vector[i], 0)
                }else{
                    # check the number of elements that is intersection
                    intersect_length = length(intersect(coloc_trait[[j]], true_trait[[i]]))
                    my_vector[i] = max(my_vector[i], intersect_length)
                    } 
            
                }
                
            }
        }

        return(my_vector)
    }

    true_trait_vec = function(true_variant, true_trait){  
        var_num = length(true_variant)
        my_vector <- numeric(var_num)
        # how many traits are in each true trait set
        for(i in 1:var_num){
            my_vector[i] = length(true_trait[[i]])
        }
        return(my_vector)
    }

    coloc_set_vec = function(true_variant, true_trait, coloc_set, coloc_trait){
        set_num = length(coloc_set)
        ## initialize 0 vector for each coloc set   
        my_vector <- numeric(set_num)
        # loop for every ture_causal_variant
        for(i in 1:length(true_variant)){
        # loop for every colocalization set result to see which true_snp is in the set
            for (j in 1:set_num){
                # use the same index as the coloc_set to find corresponding coloc_traits for that set,
                # see how many traits are successfully captured
                if(true_variant[i] %in% unlist(coloc_set[[j]])){
                    difference = setdiff(coloc_trait[[j]], true_trait[[i]])
                    if(length(difference) > 0){
                        my_vector[j] = max(my_vector[j], 0)
                    }else{
                        my_vector[j] = max(my_vector[j], 1)
                    } 

                }

            }
        }

        return(my_vector)
    }

    perfect_number = function(coloc_trait_vector, true_trait_vector){
        perfect_match_number = 0
        for (i in 1:length(coloc_trait_vector)){
            if(coloc_trait_vector[i] == true_trait_vector[i]){
                perfect_match_number = perfect_match_number + 1
            }

        }
        return(perfect_match_number)

    }

    partial_number = function(coloc_trait_vector, true_trait_vector){
        partial_match_number = 0
        for (i in 1:length(coloc_trait_vector)){
            if(coloc_trait_vector[i] > 0){
                partial_match_number = partial_match_number + 1
            }

        }
        return(partial_match_number)

    }



    colocboost_summary = function(true_variant, true_trait, coloc_set, coloc_trait){
        if(length(coloc_set) == 0){
            coloc_set = NULL
        }

        total_causal_var_number = length(true_variant)

        if(total_causal_var_number == 0){
            return(list(total_causal_var_number = 0, 
                perfect_causal_var_number = 0, 
               partial_causal_var_number = 0,
                true_trait_number = 0, 
                predict_trait_number = length(unlist(coloc_trait)),
                total_trait_number = 0, 
                true_set_number = 0, 
                total_set_number = length(coloc_set)))
            }


        coloc_trait_vector = coloc_trait_vec(true_variant, true_trait, coloc_set, coloc_trait)

        true_trait_vector = true_trait_vec(true_variant, true_trait)

        coloc_set_vector = coloc_set_vec(true_variant, true_trait,coloc_set, coloc_trait)


    perfect_causal_var_number = perfect_number(coloc_trait_vector, true_trait_vector )
    partial_causal_var_number = partial_number(coloc_trait_vector, true_trait_vector)
    true_trait_number = sum(coloc_trait_vector)
    total_trait_number = sum(true_trait_vector)
    true_set_number = sum(coloc_set_vector)
    total_set_number = length(coloc_set_vector)
    set_sizes = c()
    for(i in c(1:length(coloc_set))){
        set_sizes[i] = length(coloc_set[[i]])
    }

    ### ture_trait : in the prediction, which are correct
    ### predict_trait : the total numbe rof predicted traits (some of the sets will be wrong!)

    return(list(total_causal_var_number = total_causal_var_number, 
                perfect_causal_var_number = perfect_causal_var_number, 
               partial_causal_var_number = partial_causal_var_number,
                true_trait_number = true_trait_number, 
                predict_trait_number = length(unlist(coloc_trait)),
                total_trait_number = total_trait_number, 
                true_set_number = true_set_number, 
                total_set_number = total_set_number,
                set_sizes = set_sizes))
    }


    # --- List all .rds files ---
    filenames <- list.files(${folder:r}, pattern = "*.rds$", full.names = TRUE, recursive = TRUE)

    # --- Define file reading and filtering function ---
    read_and_extract_rds <- function(file_path) {
        rds_file <- readRDS(file_path)

        # If no coloc sets detected at all, set to NULL
        if (length(rds_file$coloc_set) == 0) {
            rds_file$coloc_set = NULL
        }

        # --- Filter coloc sets that contain the target trait (trait 1) ---
        trait_with_target = sapply(rds_file$coloc_trait, function(x) any(x == 1))

        if (all(!trait_with_target)) {
            rds_file$coloc_trait = NULL
            rds_file$coloc_set = NULL
        } else {
            rds_file$coloc_trait = rds_file$coloc_trait[trait_with_target]
            rds_file$coloc_set = rds_file$coloc_set[trait_with_target]
        }

        # --- Filter true causal variants that involve the target trait ---
        trait_with_target = sapply(rds_file$true_trait, function(x) any(x == 1))

        if (all(!trait_with_target)) {
            rds_file$true_trait = NULL
            rds_file$true_variant = NULL
        } else {
            rds_file$true_trait = rds_file$true_trait[trait_with_target]
            rds_file$true_variant = rds_file$true_variant[trait_with_target]
        }

        # --- Summarize coloc performance ---
        data = colocboost_summary(
            rds_file$true_variant, 
            rds_file$true_trait, 
            rds_file$coloc_set, 
            rds_file$coloc_trait
        )

        # --- Store into tibble ---
        table <- tibble(
            total_causal_var_number = data$total_causal_var_number,
            perfect_causal_var_number = data$perfect_causal_var_number,
            partial_causal_var_number = data$partial_causal_var_number,
            true_trait_number = data$true_trait_number,
            predict_trait_number = data$predict_trait_number,
            total_trait_number = data$total_trait_number,
            true_set_number = data$true_set_number,
            total_set_number = data$total_set_number,
            single_set_number = length(rds_file$csets),
            max_causal = max(rds_file$true_trait %>% unlist() %>% table()),
            file = rds_file$file,
            out_file = file_path
        )

        return(table)
    }

    # ======= Apply the summary extraction over all files =======
    combined_table <- map_dfr(filenames, read_and_extract_rds)

    # ======= Save final summary table =======
    saveRDS(combined_table, ${_output:r})

Summary Bash commands#

ColocBoost summary#

## 2 trait summary

data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

## 5 trait summary

data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

## 10 trait summary

data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

## 20 trait summary

data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

Hyprcoloc summary#

data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}


data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

MOLOC summary#

# moloc 

data_dir="/home/hs3393/cb_Mar/simulation_result/moloc/moloc_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}

SuSiE coloc summary#

data_dir="/home/hs3393/cb_Mar/simulation_result/susie_coloc/susie_coloc_real_simulation_2trait"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary

cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err

source ~/mamba_activate.sh

sos run /home/hs3393/cb_Mar/simulation_code/4_Result_Summary.ipynb coloc_summary \
    --folder DATA_DIR/result \
    --cwd DATA_DIR/summary
EOF


base_script="summary_script"
output_script="summary.sh"
cat ${base_script}|  sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
sbatch ${output_script}