Figure 2b. Representative simulation examples.#

Representative simulation examples illustrating limitations of competing methods: (i) methods under “one-causal” assumption fail to detect causal signals with heterogeneous effects and (ii) spurious detection of non-causal variants with strongest marginal effect; (iii) COLOC (V5) shows reduced sensitivity to weak causal effects in GWAS.

1. Heterogeneous effects for two causal variants#

When analyzing multiple causal variants, it’s important to consider that they may exhibit heterogeneous effects across different contexts, populations, or individuals.

ColocBoost identify two CoS including two true causal variants.#

library(tidyverse)
library(colocboost)
data(Heterogeneous_Effect)
data <- Heterogeneous_Effect
X <- data$X
Y <- data$Y
data$variant
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
 dplyr     1.1.4      readr     2.1.5
 forcats   1.0.0      stringr   1.5.1
 ggplot2   3.5.1      tibble    3.2.1
 lubridate 1.9.4      tidyr     1.3.1
 purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
 dplyr::filter() masks stats::filter()
 dplyr::lag()    masks stats::lag()
 Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
  1. 622
  2. 1275
res <- colocboost(X = X, Y = Y)
res$cos_details$cos$cos_index
Starting checking the input data.

Starting gradient boosting algorithm.

Boosting iterations for outcome 1 converge after 51 iterations!

Boosting iterations for outcome 2 converge after 56 iterations!

Starting assemble analyses and results summary.
$`cos1:y1_y2`
622
$`cos2:y1_y2`
  1. 1293
  2. 1270
  3. 1288
  4. 1287
  5. 1305
  6. 1300
  7. 1275
  8. 1271
  9. 1273
  10. 1290
  11. 1291
options(repr.plot.width = 10, repr.plot.height = 6)
colocboost_plot(res, plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant))
colocboost_plot(res, plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant), grange = c(0:2000))
res$cos_details$cos_purity$min_abs_cor
A matrix: 2 × 2 of type dbl
cos1:y1_y2cos2:y1_y2
cos1:y1_y20.98690880.0125171
cos2:y1_y20.01251710.9840258
colocboost_plot(res, y = "vcp", plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant), 
                grange = c(0:2000))

HyPrColoc#

library(hyprcoloc)
betas = sebetas = matrix(NA, nrow = ncol(X), ncol = 2) 
for (i in 1:2){
    x <- scale(X)
    y <- scale(Y[,i])
    rr <- susieR::univariate_regression(x, y)
    betas[,i] = rr$betahat
    sebetas[, i] <- rr$sebetahat
}
traits <- c(1:2)
rsid <- paste0("snp", c(1:ncol(X)))
colnames(betas) <- colnames(sebetas) <- traits
rownames(betas) <- rownames(sebetas) <- rsid
res_hyprcoloc <- hyprcoloc(betas, sebetas, trait.names=traits, snp.id=rsid, snpscores = TRUE)
Warning message:
“replacing previous import ‘Matrix::tcrossprod’ by ‘Rmpfr::tcrossprod’ when loading ‘hyprcoloc’”
Warning message:
“replacing previous import ‘Matrix::crossprod’ by ‘Rmpfr::crossprod’ when loading ‘hyprcoloc’”
Warning message:
“replacing previous import ‘Matrix::.__C__atomicVector’ by ‘Rmpfr::.__C__atomicVector’ when loading ‘hyprcoloc’”
res_hyprcoloc$results
A data.frame: 1 × 7
iterationtraitsposterior_probregional_probcandidate_snpposterior_explained_by_snpdropped_trait
<dbl><chr><lgl><dbl><lgl><dbl><chr>
1NoneNA1NANA1
res_hyprcoloc %>% str
List of 1
 $ results:'data.frame':	1 obs. of  7 variables:
  ..$ iteration                 : num 1
  ..$ traits                    : chr "None"
  ..$ posterior_prob            : logi NA
  ..$ regional_prob             : num 1
  ..$ candidate_snp             : logi NA
  ..$ posterior_explained_by_snp: num NA
  ..$ dropped_trait             : chr "1"
 - attr(*, "class")= chr "hyprcoloc"

COLOC (one causal assumption)#

library(coloc)
LD <- get_cormat(X)
colnames(LD) <- rownames(LD) <- 1:ncol(X)
MAF <- colMeans(X)/2 %>% as.numeric
D1 <- list("beta" = betas[,1], "varbeta" = sebetas[,1]^2,
           "N" = nrow(X), "sdY" = sd(unlist(data$Y[,1])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(X), "position" = 1:ncol(X))
D2 <- list("beta" = betas[,2], "varbeta" = sebetas[,2]^2,
           "N" = nrow(X), "sdY" = sd(unlist(data$Y[,2])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(X), "position" = 1:ncol(X))
my.res <- coloc.abf(dataset1=D1, dataset2=D2)
This is coloc version 5.2.3

Warning message in adjust_prior(p1, nrow(df1), "1"):
“p1 * nsnps >= 1, setting p1=1/(nsnps + 1)”
Warning message in adjust_prior(p2, nrow(df2), "2"):
“p2 * nsnps >= 1, setting p2=1/(nsnps + 1)”
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.76e-21  4.33e-09  4.04e-13  9.93e-01  6.84e-03 
[1] "PP abf for shared variant: 0.684%"
plot(my.res$results$SNP.PP.H4[c(1500:4000)])

COLOC (V5)#

library(susieR)
out_p <- runsusie(D1)
out_e <- runsusie(D2)
out_coloc = coloc.susie(out_e, out_p)
running max iterations: 100

	converged: TRUE

running max iterations: 100

	converged: TRUE
out_coloc$summary
A data.table: 4 × 10
nsnpshit1hit2PP.H0.abfPP.H1.abfPP.H2.abfPP.H3.abfPP.H4.abfidx1idx2
<int><chr><chr><dbl><dbl><dbl><dbl><dbl><int><int>
12900349522564.890181e-211.140795e-124.286644e-090.9999999962.924359e-1111
12900225622569.904029e-194.439301e-158.681693e-070.0018952039.981039e-0121
12900349534081.921831e-174.483300e-095.255216e-100.1208367728.791632e-0112
12900225634088.156886e-123.656176e-082.230487e-040.9997750961.818970e-0622
o <- order(out_coloc$results$SNP.PP.H4.row2,decreasing=TRUE)
cs <- cumsum(out_coloc$results$SNP.PP.H4.row2[o])
w <- which(cs > 0.95)[1]
cos1 <- out_coloc$results[o,][1:w,]$snp
cos1
'2256'
o <- order(out_coloc$results$SNP.PP.H4.row3,decreasing=TRUE)
cs <- cumsum(out_coloc$results$SNP.PP.H4.row3[o])
w <- which(cs > 0.95)[1]
cos2 <- out_coloc$results[o,][1:w,]$snp
cos2
  1. '3425'
  2. '3426'
  3. '3457'
  4. '3521'
  5. '3419'
  6. '3480'
  7. '3453'
  8. '3437'
  9. '3495'
  10. '3416'
  11. '3484'
  12. '3523'
  13. '3410'
  14. '3411'
  15. '3414'
  16. '3415'
  17. '3417'
  18. '3420'
  19. '3421'
  20. '3422'
  21. '3423'
  22. '3424'
  23. '3432'
  24. '3435'
  25. '3439'
  26. '3440'
  27. '3449'
  28. '3450'
  29. '3451'
  30. '3452'
  31. '3454'
  32. '3455'
  33. '3456'
  34. '3458'
  35. '3472'
  36. '3476'
  37. '3479'
  38. '3481'
  39. '3482'
  40. '3485'
  41. '3486'
  42. '3504'
  43. '3508'
  44. '3510'
  45. '3511'
  46. '3512'
  47. '3516'
  48. '3447'
  49. '3446'
  50. '3494'
  51. '3428'
  52. '3438'
vcp_coloc <- 1 - (1-out_coloc$results$SNP.PP.H4.row1)*(1-out_coloc$results$SNP.PP.H4.row4)
plot(vcp_coloc[c(1500:4000)])

MOLOC#

library(moloc)
moloc_input <- lapply(1:2, function(cnt){
    tibble(SNP = 1:ncol(data$X), BETA = betas[,cnt],
           SE = sebetas[,cnt], N = nrow(data$X), MAF = MAF)
})
res_moloc <- moloc_test(listData = moloc_input)
Use default priors: 1e-04,1e-05

Mean estimated sdY from data1: 0.560918402515816

Mean estimated sdY from data2: 0.561003724043599

Use prior variances of 0.01 0.1 0.5

Use prior variances of 0.01 0.1 0.5

Best SNP per trait a: 2256

Probability that trait a colocalizes with at least one other trait = 0.0067

Best SNP per trait b: 2256

Probability that trait b colocalizes with at least one other trait = 0.0067

Best SNP per trait ab: 2256

Probability that trait ab colocalizes with at least one other trait = 0.0067
res_moloc$priors_lkl_ppa
A data.frame: 5 × 4
priorsumbflogBF_locusPPA
<dbl><dbl><dbl><dbl>
a1.00e-0437.5606128.095634.648440e-09
a,b1.00e-0865.9509947.021039.933272e-01
b1.00e-0428.3903918.925404.838694e-13
ab1.00e-0554.0402244.575246.672820e-03
zero9.99e-01 0.00000 0.000002.264332e-21
res_moloc$best_snp
A data.frame: 3 × 2
coloc_ppasbest.snp.coloc
<dbl><chr>
a0.006672822256
b0.006672822256
ab0.006672822256

2. Non-causal strongest marginal effect#

library(colocboost)
library(tidyverse)
data(Non_Causal_Strongest_Marginal)
data <- Non_Causal_Strongest_Marginal
X <- data$X
Y <- data$Y
data$variant
  1. 654
  2. 858
res <- colocboost(X = X, Y = Y)
res$cos_details$cos$cos_index
Starting checking the input data.

Starting gradient boosting algorithm.

Boosting iterations for outcome 1 converge after 50 iterations!

Boosting iterations for outcome 2 converge after 56 iterations!

Starting assemble analyses and results summary.
$`cos1:y1_y2`
  1. 654
  2. 721
  3. 761
  4. 797
  5. 673
  6. 677
  7. 756
  8. 720
  9. 740
  10. 678
  11. 773
  12. 777
$`cos2:y1_y2`
  1. 858
  2. 865
  3. 851
  4. 886
  5. 876
  6. 854
  7. 885
options(repr.plot.width = 10, repr.plot.height = 6)
colocboost_plot(res, plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant))
colocboost_plot(res, y = "vcp", plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant))
res$cos_details$cos_purity$min_abs_cor
A matrix: 2 × 2 of type dbl
cos1:y1_y2cos2:y1_y2
cos1:y1_y20.86666750.2562119
cos2:y1_y20.25621190.7613753

HyPrColoc#

library(hyprcoloc)
betas = sebetas = matrix(NA, nrow = ncol(X), ncol = 2) 
for (i in 1:2){
    x <- scale(X)
    y <- scale(Y[,i])
    rr <- susieR::univariate_regression(x, y)
    betas[,i] = rr$betahat
    sebetas[, i] <- rr$sebetahat
}
traits <- c(1:2)
rsid <- paste0("snp", c(1:ncol(X)))
colnames(betas) <- colnames(sebetas) <- traits
rownames(betas) <- rownames(sebetas) <- rsid
res_hyprcoloc <- hyprcoloc(betas, sebetas, trait.names=traits, snp.id=rsid, snpscores = TRUE)
res_hyprcoloc$results
A data.frame: 1 × 7
iterationtraitsposterior_probregional_probcandidate_snpposterior_explained_by_snpdropped_trait
<dbl><chr><dbl><dbl><chr><dbl><lgl>
11, 20.99951snp7211NA
plot(res_hyprcoloc$snpscores[[1]])

COLOC (one causal assumption)#

library(coloc)
LD <- get_cormat(data$X)
colnames(LD) <- rownames(LD) <- 1:ncol(data$X)
MAF <- colMeans(data$X)/2 %>% as.numeric
D1 <- list("beta" = betas[,1], "varbeta" = sebetas[,1]^2,
           "N" = nrow(data$X), "sdY" = sd(unlist(data$Y[,1])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(data$X), "position" = 1:ncol(data$X))
D2 <- list("beta" = betas[,2], "varbeta" = sebetas[,2]^2,
           "N" = nrow(data$X), "sdY" = sd(unlist(data$Y[,2])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(data$X), "position" = 1:ncol(data$X))
my.res <- coloc.abf(dataset1=D1, dataset2=D2)
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 6.41e-51  2.21e-28  3.44e-26  1.86e-04  1.00e+00 
[1] "PP abf for shared variant: 100%"
o <- order(my.res$results$SNP.PP.H4,decreasing=TRUE)
cs <- cumsum(my.res$results$SNP.PP.H4[o])
w <- which(cs > 0.95)[1]
my.res$results[o,][1:w,]$snp
'721'
pph4.snp <- my.res$results$SNP.PP.H4[order(my.res$results$position,decreasing=FALSE)]
plot(pph4.snp[c(400:1200)])

COLOC (V5)#

library(susieR)
out_p <- runsusie(D1)
out_e <- runsusie(D2)
out_coloc = coloc.susie(out_e, out_p)
running max iterations: 100

	converged: TRUE

running max iterations: 100

	converged: TRUE
out_coloc$summary
o <- order(out_coloc$results$SNP.PP.H4.row2,decreasing=TRUE)
cs <- cumsum(out_coloc$results$SNP.PP.H4.row2[o])
w <- which(cs > 0.95)[1]
cos1 <- out_coloc$results[o,][1:w,]$snp
cos1
  1. '654'
  2. '721'
  3. '761'
  4. '673'
  5. '677'
  6. '797'
o <- order(out_coloc$results$SNP.PP.H4.row3,decreasing=TRUE)
cs <- cumsum(out_coloc$results$SNP.PP.H4.row3[o])
w <- which(cs > 0.95)[1]
cos2 <- out_coloc$results[o,][1:w,]$snp
cos2
'858'
vcp_coloc <- 1 - (1-out_coloc$results$SNP.PP.H4.row2)*(1-out_coloc$results$SNP.PP.H4.row3)
plot(vcp_coloc[c(400:1200)])

MOLOC#

library(moloc)
moloc_input <- lapply(1:2, function(cnt){
    tibble(SNP = 1:ncol(data$X), BETA = betas[,cnt],
           SE = sebetas[,cnt], N = nrow(data$X), MAF = MAF)
})
res_moloc <- moloc_test(listData = moloc_input)
Use default priors: 1e-04,1e-05

Mean estimated sdY from data1: 0.553434902059988

Mean estimated sdY from data2: 0.553187001637554

Use prior variances of 0.01 0.1 0.5

Use prior variances of 0.01 0.1 0.5

Best SNP per trait a: 721

Probability that trait a colocalizes with at least one other trait = 1

Best SNP per trait b: 721

Probability that trait b colocalizes with at least one other trait = 1

Best SNP per trait ab: 721

Probability that trait ab colocalizes with at least one other trait = 1
res_moloc$priors_lkl_ppa
A data.frame: 5 × 4
priorsumbflogBF_locusPPA
<dbl><dbl><dbl><dbl>
a1.00e-04 60.59171 52.907842.492328e-28
a,b1.00e-08125.07877109.711042.529180e-04
b1.00e-04 65.56983 57.885973.618911e-26
ab1.00e-05126.45320118.769349.997471e-01
zero9.99e-01 0.00000 0.000001.207708e-50
res_moloc$best_snp
A data.frame: 3 × 2
coloc_ppasbest.snp.coloc
<dbl><chr>
a0.9997471721
b0.9997471721
ab0.9997471721

3. Weaker causal effect for target traits#

library(colocboost)
library(tidyverse)
data(Weaker_GWAS_Effect)
data <- Weaker_GWAS_Effect
X <- data$X
Y <- data$Y
data$variant
  1. 1926
  2. 2271
res <- colocboost(X = X, Y = Y)
res$cos_details$cos$cos_index
Starting checking the input data.

Starting gradient boosting algorithm.

Boosting iterations for outcome 1 converge after 23 iterations!

Boosting iterations for outcome 2 converge after 33 iterations!

Starting assemble analyses and results summary.
$`cos1:y1_y2`
1926
$`cos2:y1_y2`
2271
options(repr.plot.width = 10, repr.plot.height = 6)
colocboost_plot(res, plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant))
colocboost_plot(res, y = "vcp", plot_cols = 1, add_vertical = T, add_vertical_idx = c(data$variant))

HyPrColoc#

library(hyprcoloc)
betas = sebetas = matrix(NA, nrow = ncol(X), ncol = 2) 
for (i in 1:2){
    x <- scale(X)
    y <- scale(Y[,i])
    rr <- susieR::univariate_regression(x, y)
    betas[,i] = rr$betahat
    sebetas[, i] <- rr$sebetahat
}
traits <- c(1:2)
rsid <- paste0("snp", c(1:ncol(X)))
colnames(betas) <- colnames(sebetas) <- traits
rownames(betas) <- rownames(sebetas) <- rsid
res_hyprcoloc <- hyprcoloc(betas, sebetas, trait.names=traits, snp.id=rsid, snpscores = TRUE)
res_hyprcoloc$results
A data.frame: 1 × 7
iterationtraitsposterior_probregional_probcandidate_snpposterior_explained_by_snpdropped_trait
<dbl><chr><dbl><dbl><chr><dbl><lgl>
11, 20.96161snp19260.9823NA
plot(res_hyprcoloc$snpscores[[1]])

COLOC (one causal assumption)#

library(coloc)
LD <- get_cormat(data$X)
colnames(LD) <- rownames(LD) <- 1:ncol(data$X)
MAF <- colMeans(data$X)/2 %>% as.numeric
D1 <- list("beta" = betas[,1], "varbeta" = sebetas[,1]^2,
           "N" = nrow(data$X), "sdY" = sd(unlist(data$Y[,1])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(data$X), "position" = 1:ncol(data$X))
D2 <- list("beta" = betas[,2], "varbeta" = sebetas[,2]^2,
           "N" = nrow(data$X), "sdY" = sd(unlist(data$Y[,2])),
           "type" = 'quant', "MAF" = MAF, "LD" = LD,
           "snp" = 1:ncol(data$X), "position" = 1:ncol(data$X))
my.res <- coloc.abf(dataset1=D1, dataset2=D2)
PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 3.94e-13  2.65e-09  2.69e-06  1.71e-02  9.83e-01 
[1] "PP abf for shared variant: 98.3%"
o <- order(my.res$results$SNP.PP.H4,decreasing=TRUE)
cs <- cumsum(my.res$results$SNP.PP.H4[o])
w <- which(cs > 0.95)[1]
my.res$results[o,][1:w,]$snp
'1926'
pph4.snp <- my.res$results$SNP.PP.H4[order(my.res$results$position,decreasing=FALSE)]
plot(pph4.snp)

COLOC (V5)#

library(susieR)
out_p <- runsusie(D1)
out_e <- runsusie(D2)
out_coloc = coloc.susie(out_e, out_p)
running max iterations: 100

	converged: TRUE

running max iterations: 100

	converged: TRUE
out_coloc$summary
o <- order(out_coloc$results$SNP.PP.H4.row2,decreasing=TRUE)
cs <- cumsum(out_coloc$results$SNP.PP.H4.row2[o])
w <- which(cs > 0.95)[1]
cos1 <- out_coloc$results[o,][1:w,]$snp
cos1
'1926'
vcp_coloc <- out_coloc$results$SNP.PP.H4.row2
plot(vcp_coloc)

MOLOC#

library(moloc)
moloc_input <- lapply(1:2, function(cnt){
    tibble(SNP = 1:ncol(data$X), BETA = betas[,cnt],
           SE = sebetas[,cnt], N = nrow(data$X), MAF = MAF)
})
res_moloc <- moloc_test(listData = moloc_input)
Use default priors: 1e-04,1e-05

Mean estimated sdY from data1: 0.537763795330779

Mean estimated sdY from data2: 0.537609105700604

Use prior variances of 0.01 0.1 0.5

Use prior variances of 0.01 0.1 0.5

Best SNP per trait a: 1926

Probability that trait a colocalizes with at least one other trait = 0.99

Best SNP per trait b: 1926

Probability that trait b colocalizes with at least one other trait = 0.99

Best SNP per trait ab: 1926

Probability that trait ab colocalizes with at least one other trait = 0.99
res_moloc$priors_lkl_ppa
A data.frame: 5 × 4
priorsumbflogBF_locusPPA
<dbl><dbl><dbl><dbl>
a1.00e-0417.99025 9.4562041.984014e-09
a,b1.00e-0842.7190725.6509741.089268e-02
b1.00e-0424.8157416.2816861.827317e-06
ab1.00e-0540.3200331.7859789.891055e-01
zero9.99e-01 0.00000 0.0000003.051242e-13
res_moloc$best_snp
A data.frame: 3 × 2
coloc_ppasbest.snp.coloc
<dbl><chr>
a0.98910551926
b0.98910551926
ab0.98910551926