Colocalization result summary#
Goal#
Summarize the result based on the key four outputs to calculate FDR and power.
Fit any coloclaization method generated by the notebooks in this repo. As long as they are summarized to have the elements: coloc_trait, coloc_set true_trait and true_variant.
Input#
File(s) of colocalization result.
Output#
A dataframe, each row is a summarization of one finemapping result.
Example output:
result = readRDS("/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/summary/result_summary.rds")
head(result)
total_causal_var_number | perfect_causal_var_number | partial_causal_var_number | true_trait_number | predict_trait_number | total_trait_number | true_set_number | total_set_number | single_set_number | max_causal | file | out_file | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
<int> | <dbl> | <dbl> | <dbl> | <int> | <dbl> | <dbl> | <int> | <int> | <int> | <chr> | <chr> | |
1 | 1 | 1 | 1 | 5 | 5 | 5 | 1 | 1 | 0 | 1 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_1_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds |
2 | 2 | 1 | 1 | 5 | 5 | 8 | 1 | 1 | 0 | 2 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_2_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds |
3 | 3 | 0 | 2 | 4 | 4 | 8 | 2 | 2 | 0 | 3 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_3_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_3_ncausal_10_trait_ntr_10_hypercoloc.rds |
4 | 4 | 0 | 3 | 10 | 10 | 17 | 3 | 3 | 0 | 3 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_0_real_simulation_4_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_0_real_simulation_4_ncausal_10_trait_ntr_10_hypercoloc.rds |
5 | 1 | 1 | 1 | 4 | 4 | 4 | 1 | 1 | 0 | 1 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_1_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_1_ncausal_10_trait_ntr_10_hypercoloc.rds |
6 | 2 | 1 | 2 | 6 | 6 | 7 | 2 | 2 | 0 | 2 | /mnt/vast/hpc/csg/hs3393/cb_Mar/simulation_data/real_simulation_10trait/sample_1_real_simulation_2_ncausal_10_trait.rds | /home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_1_real_simulation_2_ncausal_10_trait_ntr_10_hypercoloc.rds |
Summary code#
[coloc_summary]
parameter: folder = path
parameter: cwd = path("output")
parameter: job_size = 1
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: container = ""
input: folder , group_by = 1
output: f'{cwd:a}/{_input[0]:b}_summary.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library("tidyverse")
source("/home/hs3393/cloud_colocalization/simulation_code/colocboost_summary.r")
filenames <- list.files(${folder:r}, pattern="*.rds$", full.names=TRUE, recursive = T)
read_and_extract_rds <- function(file_path) {
rds_file <- readRDS(file_path) # Read the RDS file
if(length(rds_file$coloc_set) == 0){
rds_file$coloc_set = NULL
}
data = colocboost_summary(rds_file$true_variant,
rds_file$true_trait, rds_file$coloc_set, rds_file$coloc_trait)
table <- tibble(total_causal_var_number = data$total_causal_var_number,
perfect_causal_var_number = data$perfect_causal_var_number,
partial_causal_var_number = data$partial_causal_var_number,
true_trait_number = data$true_trait_number,
predict_trait_number = data$predict_trait_number,
total_trait_number = data$total_trait_number,
true_set_number = data$true_set_number,
total_set_number = data$total_set_number,
single_set_number = length(rds_file$csets),
max_causal = max(rds_file$true_trait %>% unlist() %>% table()),
file = rds_file$file,
out_file = file_path)
return(table)
}
combined_table <- map_dfr(filenames, read_and_extract_rds)
saveRDS(combined_table, ${_output:r})
Summary Bash commands#
ColocBoost summary#
## 2 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 5 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 10 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
## 20 trait summary
data_dir="/home/hs3393/cb_Mar/simulation_result/real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
Hyprcoloc summary#
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_5trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
data_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_20trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 10:00:00
#SBATCH --mem=30000
#SBATCH -J summary
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
MOLOC summary#
# moloc
data_dir="/home/hs3393/cb_Mar/simulation_result/moloc/moloc_real_simulation_2trait/"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
SuSiE coloc summary#
data_dir="/home/hs3393/cb_Mar/simulation_result/susie_coloc/susie_coloc_real_simulation_2trait"
mkdir -p ${data_dir}/summary
cd ${data_dir}/summary
cat << 'EOF' > summary_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 8:00:00
#SBATCH --mem=30000
#SBATCH -J sum
#SBATCH -o DATA_DIR/log/summary."%j".out
#SBATCH -e DATA_DIR/log/summary."%j".err
source ~/mamba_activate.sh
sos run /home/hs3393/cb_Mar/simulation_code/4.Result_Summary.ipynb coloc_summary \
--folder DATA_DIR/result \
--cwd DATA_DIR/summary
EOF
base_script="summary_script"
output_script="summary.sh"
cat ${base_script}| sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}