Other colocalization methods#

Goal#

This notebook takes the input and run Hyprcoloc, MOLOC or COLOC(V5) (SuSiE-Coloc) , the result can be summarized to get power and FDR.

Because different methods have different output, we summarized four elements to calculate FDR and power - coloc_trait, coloc_set true_trait and true_variant.

Input#

Individual level data X and Y or summary statistics z and LD from other notebooks. put in the parameter: simufile.

Output#

Hyprcoloc / MOLOC / COLOC (V5) original result, along with some summarized elements.

Example output:

result = readRDS("/home/hs3393/cb_Mar/simulation_result/hyprcoloc/hyp_real_simulation_10trait/result/sample_9_real_simulation_3_ncausal_10_trait_ntr_10_hypercoloc.rds")
result$results
A data.frame: 3 × 7
iterationtraitsposterior_probregional_probcandidate_snpposterior_explained_by_snpdropped_trait
<dbl><chr><dbl><dbl><chr><dbl><chr>
115, 90.87541snp917 0.3881NA
223, 80.79151snp25680.0418NA
332, 60.94261snp29040.1141NA
print("=====Prediction=====")
# colocalization result - CoS
result$coloc_set

# colocalization result - colocalizing trait
result$coloc_trait

print("=====Truth=====")
#  true colocalizing CoS
result$true_variant

#  true colocalizing trait
result$true_trait
[1] "=====Prediction====="
    1. 917
    2. 911
    3. 931
    4. 934
    5. 916
    6. 913
    1. 2568
    2. 2479
    3. 2605
    4. 2627
    5. 2535
    6. 2483
    7. 2485
    8. 2487
    9. 2495
    10. 2496
    11. 2497
    12. 2498
    13. 2504
    14. 2507
    15. 2510
    16. 2512
    17. 2513
    18. 2514
    19. 2515
    20. 2518
    21. 2526
    22. 2528
    23. 2529
    24. 2530
    25. 2537
    26. 2544
    27. 2545
    28. 2547
    29. 2548
    30. 2550
    31. 2552
    32. 2554
    33. 2555
    34. 2558
    35. 2560
    36. 2562
    37. 2563
    38. 2582
    39. 2590
    40. 2591
    41. 2592
    42. 2600
    43. 2603
    44. 2604
    45. 2610
    46. 2617
    47. 2618
    48. 2620
    49. 2621
    50. 2625
    51. 2630
    52. 2631
    53. 2632
    54. 2634
    55. 2635
    56. 2636
    57. 2639
    58. 2642
    59. 2644
    60. 2646
    61. 2647
    62. 2648
    63. 2650
    64. 2652
    65. 2655
    66. 2656
    67. 2657
    68. 2658
    69. 2659
    70. 2665
    71. 2667
    72. 2668
    73. 2673
    74. 2594
    75. 2539
    76. 2614
    77. 2564
    78. 2615
    79. 2649
    80. 2531
    81. 2533
    82. 2493
    83. 2576
    1. 2904
    2. 2892
    3. 2900
    4. 2908
    5. 2910
    6. 2911
    7. 2920
    8. 2874
    9. 2887
    10. 2859
    11. 2888
    12. 2894
    1. 5
    2. 9
    1. 3
    2. 8
    1. 2
    2. 6
[1] "=====Truth====="
  1. 934
  2. 2485
  3. 2908
    1. 5
    2. 6
    3. 9
    1. 3
    2. 8
    1. 2
    2. 6

Run HyprColoc without LD#

[hyprcoloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: trait = 10
parameter: container = ""
parameter: setting="normal"
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_hyprcoloc.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R:  expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container 
    library(hyprcoloc)
    source("/home/hs3393/cb_simulation/simulation_code/hypercoloc_set.r")
    file = ${_input:ar,}
    hypercoloc_result = run_hypercoloc(file, ${trait}, setting="${setting}")
    saveRDS(hypercoloc_result, ${_output:r})
  

Run Hyprcoloc with LD info#

[hyprcoloc_LD_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "30G"
parameter: numThreads = 1
parameter: trait = 10
parameter: container = ""
parameter: setting="normal"
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_hypercoloc_LD.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R:  expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container 
    library(hyprcoloc)
    source("/home/hs3393/cb_simulation/simulation_code/hypercoloc_set_LD.r")
    file = ${_input:ar,}
    hypercoloc_result = run_hypercoloc(file, ${trait}, setting="${setting}")
    saveRDS(hypercoloc_result, ${_output:r})

Run MOLOC#

[moloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 30
parameter: walltime = "50h"
parameter: mem = "40G"
parameter: numThreads = 1
parameter: trait = 50
parameter: container = ""
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_moloc_set.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R:  expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container 
    library(moloc)
    source("/home/hs3393/cb_simulation/simulation_code/moloc_set.r")
    files = c(${_input:ar,})
    moloc_result = run_moloc(files, ${trait})
    saveRDS(moloc_result, ${_output:r})

Run COLOC V5 (SuSiE coloc)#

[susie_coloc_set]
parameter: simufile = paths
parameter: cwd = path("output")
parameter: job_size = 300
parameter: walltime = "50h"
parameter: mem = "40G"
parameter: numThreads = 1
parameter: trait = 2
parameter: container = ""
input: simufile, group_by = 1
output: f'{cwd:a}/{_input[0]:bn}_ntr_{trait}_susie_coloc_result.rds'
task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'
R:  expand = '${ }', stdout = f"{_output:n}.stdout", stderr = f"{_output:n}.stderr", container = container 
    source("/home/hs3393/cb_simulation/simulation_code/susie_coloc.r")
    file = ${_input:ar,}
    susie_coloc_result = run_susie_coloc(file)
    saveRDS(susie_coloc_result, ${_output:r})

Bash files#

Hyprcoloc#

## 2 trait

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="hyp_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=20000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 20G --trait 2 \
    --cwd WORK_DIR/JOB/result
EOF


base_script="base_script"
for ncausal in 1 2 3; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done

## 5 trait

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_5trait/"
job="hyp_real_simulation_5trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=20000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 20G --trait 5 \
    --cwd WORK_DIR/JOB/result
EOF


base_script="base_script"
for ncausal in 1 2 3 4; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done

## 10 trait

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_10trait/"
job="hyp_real_simulation_10trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=30000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 30G --trait 10 \
    --cwd WORK_DIR/JOB/result
EOF


base_script="base_script"
for ncausal in 1 2 3 4; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done

## 20 trait

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_20trait/"
job="hyp_real_simulation_20trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/hyprcoloc/"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=40000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb hyprcoloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 40G --trait 20 \
    --cwd WORK_DIR/JOB/result
EOF

base_script="base_script"
for ncausal in 1 2 3 4; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done

MOLOC#

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="moloc_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/moloc"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=15000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb moloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 15G --trait 2 \
    --cwd WORK_DIR/JOB/result
EOF


base_script="base_script"
for ncausal in 1 2 3; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done

COLOC V5 (SuSiE-COLOC)#

data_dir="/home/hs3393/cb_Mar/simulation_data/real_simulation_2trait/"
job="susie_coloc_real_simulation_2trait"
work_dir="/home/hs3393/cb_Mar/simulation_result/susie_coloc"
#!/bin/bash

mkdir -p ${work_dir}/${job}/code
mkdir -p ${work_dir}/${job}/log
mkdir -p ${work_dir}/${job}/result

cd ${work_dir}/${job}/code

cat << 'EOF' > base_script
#!/bin/bash -l
# NOTE the -l flag!
#
#SBATCH -t 80:00:00
#SBATCH --mem=40000
#SBATCH -J JOB
#SBATCH -o WORK_DIR/JOB/log/JOB.%j.out
#SBATCH -e WORK_DIR/JOB/log/JOB.%j.err

source /home/hs3393/mamba_activate.sh
module load Singularity

cd DATA_DIR
sos run /home/hs3393/cb_Mar/simulation_code/3.Other_Methods.ipynb susie_coloc_set \
    --simufile $(find -type f -name '*_NCAUSAL_ncausal_*.rds') \
    --mem 40G --trait 2 \
    --cwd WORK_DIR/JOB/result
EOF


base_script="base_script"
for ncausal in 1 2 3; do
    output_script="ncausal_${ncausal}.sh"
    cat ${base_script}| sed "s|WORK_DIR|${work_dir}|g" |sed "s|NCAUSAL|${ncausal}|g" | sed "s|JOB|${job}|g" | sed "s|DATA_DIR|${data_dir}|g"  > ${output_script}
    sbatch ${output_script}
done