我正在尝试沿着市场篮子分析的方向,但有一定的转折。假设我有兴趣对不同预定义细分市场的客户进行市场购物篮分析,并获得市场购物篮中每个商品的相关规则。这是工作代码:
l <- NULL
rf <- NULL
rf_temp <- NULL
options(scipen=999)
options(digits = 10)
for (i in ori_distinct_char) #different customer segments
{
subset <-ori[which(ori$V3==paste(i, sep = "")),]
# subseting different segments
subset_data <- as(split(as.vector(subset[,2]),as.vector(subset[,1])), "transactions")
food<- unique(subset$V2)
for (j in route[1:length(food)])
{
rules_food <- apriori(subset_data, parameter = list(supp = 0.0000001,conf = 0.0000001, minlen = 2, target = "rules"),
appearance = list(lhs = paste(j, sep = "") ,default='rhs'))
# made minimum support and confidence as low as possible to allow more rules to be defined (due to lack of data)
rules_food <- sort(rules_food, by=c("confidence"), decreasing=TRUE)
rf_temp <- as(head(rules_food,50), "data.frame")
if (nrow(rf_temp)!=0)
{rf <- rbind(rf,cbind(rf_temp,paste(i, sep = "")))}
}
}
我试图找到一种方法来运行这个脚本,以便每个排列可以以并行方式运行:即:关联规则在不同的客户群和食物上以并行方式定义,以涵盖所有可能的排列。此外,这里的工作脚本太慢,想象5段和2000种食物选择。
使用&#39; foreach&#39;进行更新。循环到目前为止:
cl<- makeCluster(3)
registerDoParallel(cl)
l <- NULL
rf <- NULL
rf_temp <- NULL
options(scipen=999)
options(digits = 10)
foreach (i = 1:length(ori_distinct_char)) %dopar% #different customer segments
{
subset <-ori[which(ori$V3==paste(i, sep = "")),]
# subseting different segments
subset_data <- as(split(as.vector(subset[,2]),as.vector(subset[,1])), "transactions")
food<- unique(subset$V2)
foreach (o = 1:length(food),.combine=rbind,.packages = 'arules') %dopar%
{
rules_food <- apriori(subset_data, parameter = list(supp = 0.0000001,conf = 0.0000001, minlen = 2, target = "rules"),
appearance = list(lhs = paste(j, sep = "") ,default='rhs'))
# made minimum support and confidence as low as possible to allow more rules to be defined (due to lack of data)
rules_food <- sort(rules_food, by=c("confidence"), decreasing=TRUE)
rf_temp <- as(head(rules_food,50), "data.frame")
if (nrow(rf_temp)!=0)
{rf <- rbind(rf,cbind(rf_temp,paste(i, sep = "")))}
}
}