以下for()
循环接受来自NAME
的每个字符名称,并提取和格式化(从crsp1
)到xts()
,然后保存到DATA
:
DATA <- xts()
system.time(
for (i in 1:dim(NAMES)[2]){
nudata <- crsp1[which(crsp1[,1]== paste(NAMES[1,i])),]
z = xts(coredata(nudata[,c(2)]), order.by= round(as.POSIXct(nudata[,7], format="%y-%m-%d"),
units=c("days")))
colnames(z) <- NAMES[1,i]
DATA <- merge.xts(DATA,z, join="outer")
}
)
代码大约需要239.21秒。我相信for()
循环可以改进,但我无法弄清楚究竟是怎么回事。我调查了library(doParallel)
并提出了以下内容:
# turned my code into a function: `CRISP`
CRISP = function(i) {
nudata <- crsp1[which(crsp1[,1]== paste(NAMES[1,i])),]
z = xts(coredata(nudata[,c(2)]), order.by= round(as.POSIXct(nudata[,7], format="%y-%m-%d"),
units=c("days")))
colnames(z) <- NAMES[1,i]
DATA <- merge.xts(DATA,z, join="outer")
}
然后我使用foreach()
# install packages and register cores first
install.packages("doParallel")
library("doParallel")
cl <- makeCluster(2)
registerDoParallel(cl)
# run in Parallel
DATA <- xts()
system.time(
foreach (i = 1:dim(NAMES)[2], .packages='xts') %dopar% CRISP(i)
)
但是,大约70秒后会打印以下错误:
Error in CRISP(i) : task 1 failed - "object 'DATA' not found"
我不明白为什么找不到DATA
?
NAMES:
NAMES <- structure(list(X1 = structure(1L, .Label = "AMERICAN CAR & FDRY CO", class = "factor"),
X2 = structure(1L, .Label = "ALASKA JUNEAU GOLD MNG CO", class = "factor"),
X3 = structure(1L, .Label = "AMERICAN SAFETY RAZOR CORP", class = "factor"),
X4 = structure(1L, .Label = "AMERICAN BRAKE SHOE & FDRY", class = "factor"),
X5 = structure(1L, .Label = "ABITIBI POWER & PAPER LTD", class = "factor")), .Names = c("X1",
"X2", "X3", "X4", "X5"), class = "data.frame", row.names = c(NA,
-1L))
crsp1:
crsp1 <- list(COMNAM = c(31, 31, 31, 31, 31, 31, 12, 12, 12, 12, 12, 12,
61, 61, 61, 61, 61, 61, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, 2,
2), RET = c(45553, 22625, 31216, 2897, 21995, 21995, 45553, 18171,
21995, 36821, 14301, 14530, 45553, 24793, 1409, 35194, 32919,
30210, 45553, 1, 26123, 4148, 26123, 40785, 45553, 6063, 29673,
9213, 26222, 28048), RETX = c(45262, 22610, 31102, 2875, 21989,
21989, 45262, 18164, 21989, 36626, 14281, 14511, 45262, 24761,
1393, 35018, 32778, 30102, 45262, 1, 26076, 4118, 26076, 40534,
45262, 6028, 29576, 9177, 26173, 27972), vwretd = c(NA, 0.005893,
0.001277, -0.003984, -0.000172, 0.007211, 0.001277, -0.003984,
-0.000172, 0.007211, -0.000804, 0.003384, NA, 0.005893, 0.001277,
-0.003984, -0.000172, 0.007211, NA, 0.005893, 0.001277, -0.003984,
-0.000172, 0.007211, NA, 0.005893, 0.001277, -0.003984, -0.000172,
0.007211), ewretd = c(NA, 0.009516, 0.00578, -0.001927, 0.001182,
0.008453, 0.00578, -0.001927, 0.001182, 0.008453, -0.001689,
0.003312, NA, 0.009516, 0.00578, -0.001927, 0.001182, 0.008453,
NA, 0.009516, 0.00578, -0.001927, 0.001182, 0.008453, NA, 0.009516,
0.00578, -0.001927, 0.001182, 0.008453), sprtrn = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), DATE = c(-16072,
-16070, -16068, -16067, -16066, -16065, -16068, -16067, -16066,
-16065, -16064, -16063, -16072, -16070, -16068, -16067, -16066,
-16065, -16072, -16070, -16068, -16067, -16066, -16065, -16072,
-16070, -16068, -16067, -16066, -16065))
答案 0 :(得分:2)
您的问题没有并行解决方案,因为您将z并行添加到数据中。 我建议你先计算所有z,然后在foreach之后将它们与数据合并。 foreach可以返回结果。
CRISP = function(i) {
nudata <- crsp1[which(crsp1[,1]== paste(NAMES[1,i])),]
z = xts(coredata(nudata[,c(2)]), order.by= round(as.POSIXct(nudata[,7], format="%y-%m-%d"),
units=c("days")))
colnames(z) <- NAMES[1,i]
return (z) #DATA <- merge.xts(DATA,z, join="outer")
}
和
system.time(
all_z=foreach (i = 1:dim(NAMES)[2], .packages='xts') %dopar% return(CRISP(i))
)
然后将all_z与数据合并