R包
parallel
doparallel
foreach
parallel包
1.鑒定本機(jī)的核數(shù)
# Load the parallel package
library(parallel)
# Store the number of cores in the object no_of_cores
no_of_cores <-detectCores()
# Print no_of_cores
print(no_of_cores)
2.parApply
3.parSapply
可變范圍
在Mac / Linux上月幌,您可以選擇使用自動包含所有環(huán)境變量的makeCluster(no_core命浴,type =“FORK”)(以下詳細(xì)信息)。 在Windows上,您必須使用并行插座集群(PSOCK),其中僅包含已加載的基本包(請注意,PSOCK在所有系統(tǒng)上都是默認(rèn)值)。 因此,您應(yīng)該始終指定并行功能所需的哪些變量和庫闹炉,例如 以下失敗:
> cl<-makeCluster(4)
> base <- 2
>
> parLapply(cl,
+ 2:4,
+ function(exponent)
+ base^exponent)
Error in checkForRemoteErrors(val) :
3 nodes produced errors; first error: 找不到對象'base'
>
> stopCluster(cl)
> cl<-makeCluster(4)
>
> base <- 2
> clusterExport(cl, "base")
> parLapply(cl,
+ 2:4,
+ function(exponent)
+ base^exponent)
[[1]]
[1] 4
[[2]]
[1] 8
[[3]]
[1] 16
您需要使用clusterExport(cl润樱,“base”)才能使該函數(shù)看到基本變量渣触。 如果您正在使用某些特殊軟件包,那么同樣需要通過clusterEvalQ來加載它們壹若。 我經(jīng)常使用rms包昵观,因此我使用clusterEvalQ(cl,library(rms))舌稀。 請注意啊犬,對clusterExport后變量的任何更改都將被忽略:
> cl<-makeCluster(no_cores)
> clusterExport(cl, "base")
> base <- 4
> # Run
> parLapply(cl,
+ 2:4,
+ function(exponent)
+ base^exponent)
[[1]]
[1] 4
[[2]]
[1] 8
[[3]]
[1] 16
>
> # Finish
> stopCluster(cl)
方法一
y <- 1:10
sapply(1:5, function(x) x + y)
library(parallel)
cl <- makeCluster(2)
y <- 1:10
# add y to function definition and parSapply call
parSapply(cl, 1:5, function(x,y) x + y, y)
# export y to the global environment of each node
# then call your original code
clusterExport(cl, "y")
parSapply(cl, 1:5, function(x) x + y)
方法二
library(parallel)
fun <- function(cl, y) {
parSapply(cl, 1:5, function(x) x + y)
}
cl <- makeCluster(2)
fun(cl, 1:10)
stopCluster(cl)
4.mclapply(wins不能使用)
workerFunc <- function(n) { return(n^2) }
values <- 1:100
library(parallel)
## Number of workers (R processes) to use:
numWorkers <- 8
## Parallel calculation (mclapply):
res <- mclapply(values, workerFunc, mc.cores = numWorkers)
print(unlist(res))
#Error in mclapply(values, workerFunc, mc.cores = numWorkers) :
# Windows不支持'mc.cores' > 1
5.parLapply
workerFunc <- function(n) { return(n^2) }
values <- 1:100
library(parallel)
## Number of workers (R processes) to use:
numWorkers <- 8
## Set up the ’cluster’
cl <- makeCluster(numWorkers, type = "PSOCK")
## Parallel calculation (parLapply):
res <- parLapply(cl, values, workerFunc)
## Shut down cluster
stopCluster(cl)
print(unlist(res))
foreach包
> library(foreach)
> library(doParallel)
載入需要的程輯包:iterators
>
> cl<-makeCluster(no_cores)
> registerDoParallel(cl)
> foreach(exponent = 2:4,
+ .combine = c) %dopar%
+ base^exponent
[1] 16 64 256
> foreach(exponent = 2:4,
+ .combine = rbind) %dopar%
+ base^exponent
[,1]
result.1 16
result.2 64
result.3 256
> foreach(exponent = 2:4,
+ .combine = list,
+ .multicombine = TRUE) %dopar%
+ base^exponent
[[1]]
[1] 16
[[2]]
[1] 64
[[3]]
[1] 256
> foreach(exponent = 2:4,
+ .combine = list) %dopar%
+ base^exponent
[[1]]
[[1]][[1]]
[1] 16
[[1]][[2]]
[1] 64
[[2]]
[1] 256
#stopImplicitCluster()
變量的域
默認(rèn)情況下,相同的本地環(huán)境中的變量是可用的:
base <- 2
cl<-makeCluster(2)
registerDoParallel(cl)
foreach(exponent = 2:4,
.combine = c) %dopar%
base^exponent
stopCluster(cl)
> cl <- makeCluster(2)
> test <- function (exponent) {
+ foreach(exponent = 2:4,
+ .combine = c) %dopar%
+ base^exponent
+ }
> test()
Show Traceback
Rerun with Debug
Error in base^exponent : task 1 failed - "找不到對象'base'"
> base <- 2
> cl<-makeCluster(2)
> registerDoParallel(cl)
>
> base <- 4
> test <- function (exponent) {
+ foreach(exponent = 2:4,
+ .combine = c,
+ .export = "base") %dopar%
+ base^exponent
+ }
> test()
[1] 16 64 256
>
> stopCluster(cl)
同樣壁查,您可以使用.packages選項(xiàng)加載軟件包觉至,例如 .packages = c(“rms”,“mouse”)睡腿。 我強(qiáng)烈建議您始終導(dǎo)出所需的變量语御,因?yàn)樗鼤拗圃诤瘮?shù)中封裝代碼時出現(xiàn)的問題。
cl <- makeCluster(4)
> registerDoParallel(cl)
> x <- iris[which(iris[,5] != "setosa"), c(1,5)]
> trials <- 10000
> ptime <- system.time({
+ r <- foreach(icount(trials), .combine=cbind) %dopar% {
+ ind <- sample(100, 100, replace=TRUE)
+ result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
+ coefficients(result1)
+ }
+ })[3]
> ptime
elapsed
20.01
> stime <- system.time({
+ r <- foreach(icount(trials), .combine=cbind) %do% {
+ ind <- sample(100, 100, replace=TRUE)
+ result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
+ coefficients(result1)
+ }
+ })[3]
> stime
elapsed
39.17
stopCluster(cl)
參考資料
http://gforge.se/2015/02/how-to-go-parallel-in-r-basics-tips/
https://stackoverflow.com/questions/24040280/parallel-computation-of-multiple-imputation-by-using-mice-r-package/27087791#27087791