Other colocalization methods#
Goal#
This notebook takes the input and run Hyprcoloc, MOLOC or COLOC(V5) (SuSiE-Coloc) , the result can be summarized to get power and FDR.
Because different methods have different output, we summarized four elements to calculate FDR and power - coloc_trait, coloc_set true_trait and true_variant.
Input#
Individual level data X and Y or summary statistics z and LD from other notebooks. put in the parameter: simufile.
Output#
Hyprcoloc / MOLOC / COLOC (V5) original result, along with some summarized elements.
Example output:
result = readRDS("/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_9_real_simulation_3_ncausal_10_trait_ntr_10_hypercoloc.rds")
result$results
iteration | traits | posterior_prob | regional_prob | candidate_snp | posterior_explained_by_snp | dropped_trait | |
---|---|---|---|---|---|---|---|
<dbl> | <chr> | <dbl> | <dbl> | <chr> | <dbl> | <chr> | |
1 | 1 | 5, 9 | 0.8754 | 1 | snp917 | 0.3881 | NA |
2 | 2 | 3, 8 | 0.7915 | 1 | snp2568 | 0.0418 | NA |
3 | 3 | 2, 6 | 0.9426 | 1 | snp2904 | 0.1141 | NA |
print("=====Prediction=====")
# colocalization result - CoS
result$coloc_set
# colocalization result - colocalizing trait
result$coloc_trait
print("=====Truth=====")
# true colocalizing CoS
result$true_variant
# true colocalizing trait
result$true_trait
[1] "=====Prediction====="
-
- 917
- 911
- 931
- 934
- 916
- 913
-
- 2568
- 2479
- 2605
- 2627
- 2535
- 2483
- 2485
- 2487
- 2495
- 2496
- 2497
- 2498
- 2504
- 2507
- 2510
- 2512
- 2513
- 2514
- 2515
- 2518
- 2526
- 2528
- 2529
- 2530
- 2537
- 2544
- 2545
- 2547
- 2548
- 2550
- 2552
- 2554
- 2555
- 2558
- 2560
- 2562
- 2563
- 2582
- 2590
- 2591
- 2592
- 2600
- 2603
- 2604
- 2610
- 2617
- 2618
- 2620
- 2621
- 2625
- 2630
- 2631
- 2632
- 2634
- 2635
- 2636
- 2639
- 2642
- 2644
- 2646
- 2647
- 2648
- 2650
- 2652
- 2655
- 2656
- 2657
- 2658
- 2659
- 2665
- 2667
- 2668
- 2673
- 2594
- 2539
- 2614
- 2564
- 2615
- 2649
- 2531
- 2533
- 2493
- 2576
-
- 2904
- 2892
- 2900
- 2908
- 2910
- 2911
- 2920
- 2874
- 2887
- 2859
- 2888
- 2894
-
- 5
- 9
-
- 3
- 8
-
- 2
- 6
[1] "=====Truth====="
- 934
- 2485
- 2908
-
- 5
- 6
- 9
-
- 3
- 8
-
- 2
- 6
Run HyprColoc without LD#
[hyprcoloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: trait = 10
parameter: container = ""
parameter: setting="normal"
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_hyprcoloc.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library(hyprcoloc)
source("/home/hs3393/cb_simulation/simulation_code/hypercoloc_set.r")
file = ${_input:ar,}
hypercoloc_result = run_hypercoloc(file, ${trait}, setting="${setting}")
saveRDS(hypercoloc_result, ${_output:r})
Run Hyprcoloc with LD info#
[hyprcoloc_LD_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: trait = 10
parameter: container = ""
parameter: setting="normal"
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_hypercoloc_LD.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library(hyprcoloc)
source("/home/hs3393/cb_simulation/simulation_code/hypercoloc_set_LD.r")
file = ${_input:ar,}
hypercoloc_result = run_hypercoloc(file, ${trait}, setting="${setting}")
saveRDS(hypercoloc_result, ${_output:r})
Run MOLOC#
[moloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "40G"
parameter: numThreads = 1
parameter: trait = 50
parameter: container = ""
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_moloc_set.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
library(moloc)
source("/home/hs3393/cb_simulation/simulation_code/moloc_set.r")
files = c(${_input:ar,})
moloc_result = run_moloc(files, ${trait})
saveRDS(moloc_result, ${_output:r})
Run COLOC V5 (SuSiE coloc)#
[susie_coloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 300
parameter: walltime = "50h"
parameter: mem = "40G"
parameter: numThreads = 1
parameter: trait = 2
parameter: container = ""
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_susie_coloc_result.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R: expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container
source("/home/hs3393/cb_simulation/simulation_code/susie_coloc.r")
file = ${_input:ar,}
susie_coloc_result = run_susie_coloc(file)
saveRDS(susie_coloc_result, ${_output:r})
Bash files#
Hyprcoloc#
## 2 trait
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="hyp_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=20000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 20G --trait 2 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done
## 5 trait
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_5trait/"
job="hyp_real_simulation_5trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=20000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 20G --trait 5 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3 4; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done
## 10 trait
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_10trait/"
job="hyp_real_simulation_10trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=30000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 30G --trait 10 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3 4; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done
## 20 trait
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_20trait/"
job="hyp_real_simulation_20trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=40000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 40G --trait 20 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3 4; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done
MOLOC#
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="moloc_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/moloc"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=15000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb moloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 15G --trait 2 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done
COLOC V5 (SuSiE-COLOC)#
data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="susie_coloc_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/susie_coloc"
#!/bin/bash
mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result
cd ${work_dir}/${job}/code
cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=40000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err
source /home/hs3393/mamba_activate.sh
module load Singularity
cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb susie_coloc_set \
--simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
--mem 40G --trait 2 \
--cwd WORK_DIR/JOB/result
EOF
base_script="base_script"
for ncausal in 1 2 3; do
output_script="ncausal_${ncausal}.sh"
cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g" > ${output_script}
sbatch ${output_script}
done