Skip to contents

Compute regional principal components for methylation data

Usage

compute_regional_pcs(
  meth,
  region_map,
  pc_method = c("gd", "mp"),
  verbose = FALSE
)

Arguments

meth

Data frame of methylation beta values, with CpGs in rows and samples in columns

region_map

Data frame mapping CpGs to gene regions

pc_method

Method to use for PC computation, either 'gd' (Gavish-Donoho) or 'mp' (Marchenko-Pastur)

verbose

Logical, should progress messages be displayed?

Value

A list containing several elements, including the regional PCs, percent variance, and other information

Examples

# Create synthetic methylation data
meth_data <- matrix(rnorm(1000), nrow = 100, ncol = 10)
rownames(meth_data) <- paste0("CpG", 1:100)
colnames(meth_data) <- paste0("Sample", 1:10)

# Create a synthetic region map
region_map_data <- data.frame(
    region_id = rep(c("Gene1", "Gene2"), each = 50),
    cpg_id = rownames(meth_data)
)

# Run the function
compute_regional_pcs(meth_data, region_map_data, pc_method = 'gd')
#> Using Gavish-Donoho method
#> $regional_pcs
#>             Sample1    Sample2   Sample3   Sample4   Sample5   Sample6  Sample7
#> Gene1-PC1 -2.580717 -3.2693801  2.326065  1.447216 -4.787505  2.895122 3.404681
#> Gene2-PC1 -2.977420  0.5961535 -4.432179 -1.392115  5.880181 -1.971666 1.270327
#>            Sample8   Sample9  Sample10
#> Gene1-PC1 4.597906 -1.285160 -2.748228
#> Gene2-PC1 3.954187  2.247911 -3.175380
#> 
#> $percent_variance
#>           percent_variance_explained
#> Gene1-PC1                  0.2147648
#> Gene2-PC1                  0.2226784
#> 
#> $loadings
#> $loadings$Gene1
#>                PC1
#> CpG1   0.136356580
#> CpG2  -0.012124953
#> CpG3   0.014888727
#> CpG4   0.246907998
#> CpG5   0.134151824
#> CpG6   0.163050030
#> CpG7  -0.169395251
#> CpG8  -0.137613211
#> CpG9  -0.091300327
#> CpG10 -0.141907980
#> CpG11  0.054908262
#> CpG12  0.004991858
#> CpG13 -0.052716903
#> CpG14  0.220929376
#> CpG15 -0.129149616
#> CpG16 -0.093748108
#> CpG17  0.077864499
#> CpG18 -0.114104634
#> CpG19  0.050415035
#> CpG20  0.218468507
#> CpG21  0.265436760
#> CpG22  0.069873367
#> CpG23  0.104974102
#> CpG24 -0.044154526
#> CpG25  0.072656969
#> CpG26  0.012565498
#> CpG27  0.161565265
#> CpG28  0.154100403
#> CpG29 -0.152848894
#> CpG30  0.026775014
#> CpG31  0.146802605
#> CpG32  0.060946637
#> CpG33 -0.046413766
#> CpG34  0.081218430
#> CpG35 -0.028868971
#> CpG36  0.089020570
#> CpG37 -0.007499191
#> CpG38 -0.249480086
#> CpG39  0.367478488
#> CpG40  0.071030352
#> CpG41 -0.005430531
#> CpG42  0.034216259
#> CpG43 -0.181075108
#> CpG44 -0.044398006
#> CpG45 -0.121026734
#> CpG46  0.059029971
#> CpG47 -0.176992989
#> CpG48 -0.173951036
#> CpG49  0.175717781
#> CpG50  0.300917721
#> 
#> $loadings$Gene2
#>                 PC1
#> CpG51  -0.066303061
#> CpG52  -0.212117899
#> CpG53  -0.038541483
#> CpG54  -0.293210594
#> CpG55   0.313687042
#> CpG56   0.040318222
#> CpG57   0.091889040
#> CpG58   0.010822202
#> CpG59  -0.074140783
#> CpG60  -0.070664976
#> CpG61  -0.094225492
#> CpG62  -0.215506275
#> CpG63   0.019726454
#> CpG64  -0.330429094
#> CpG65   0.007675132
#> CpG66  -0.167160767
#> CpG67  -0.020794257
#> CpG68   0.017044538
#> CpG69  -0.006596880
#> CpG70  -0.119377349
#> CpG71  -0.149644946
#> CpG72  -0.093335895
#> CpG73   0.037982610
#> CpG74  -0.015700945
#> CpG75  -0.031811171
#> CpG76  -0.118294659
#> CpG77  -0.076810221
#> CpG78  -0.063127892
#> CpG79  -0.022781771
#> CpG80   0.072786470
#> CpG81   0.041850966
#> CpG82  -0.032150612
#> CpG83   0.081351100
#> CpG84   0.019774832
#> CpG85   0.012419445
#> CpG86   0.159022326
#> CpG87  -0.178428197
#> CpG88   0.436757275
#> CpG89  -0.070221497
#> CpG90   0.061088279
#> CpG91  -0.017644281
#> CpG92   0.040797945
#> CpG93   0.216858000
#> CpG94  -0.056274842
#> CpG95  -0.059216233
#> CpG96  -0.047034356
#> CpG97  -0.086866750
#> CpG98   0.254061800
#> CpG99   0.073363060
#> CpG100  0.266090041
#> 
#> 
#> $info
#>      region  est_dim num_cpgs
#> [1,] "Gene1" 1       50      
#> [2,] "Gene2" 1       50      
#>