我对R编程比较新,所以如果这个问题太基础,我会道歉。我的交易显示了六种不同类型产品的收入。有三年的交易。我的目标是找出所有不同产品组合的产品总和,每年为2^6 - 1 = 64 - 1 = 63
。意思是,我会63*3 = 189
组合。
为了简单起见,我仅使用三个变量创建了测试数据,因为我使用while
循环编写了一年的程序,这很糟糕。我的目标是展示我想要实现的目标。尽管如此,我已经在下面的原始文件中发布了随机样本。
以下是只有三个变量Car
,Tire
以及Services
和while
循环的测试数据,以显示我在寻找的内容:
dput(Sample_File)
structure(list(Order.ID = c(171, 173, 132, 174, 132, 174, 132,
174, 174), Fiscal.Year = c(2017, 2016, 2016, 2016, 2016, 2016,
2016, 2016, 2018), Car = c(2, 2, 3, 1, 0, 0, 0, 0, 1), Tire = c(0,
0, 0, 1, 0, 1, 0, 1, 1), Services = c(3, 1, 4, 0, 4, 1, 4, 0,
0)), .Names = c("Order.ID", "Fiscal.Year", "Car", "Tire", "Services"
), row.names = c(NA, 9L), class = "data.frame")
这是我的代码:
i<-1
Csum <- matrix(rep(0,21),nrow = 7,ncol = 3)
# Row 1 is used when C is ON; T is ON ; S is ON
# Row 2 is used when C is ON; T is ON ; S is OFF
# Row 3 is used when C is ON; T is OFF ; S is ON
# Row 4 is used when C is OFF; T is ON ; S is ON
# Row 5 is used when C is ON; T is OFF ; S is OFF
# Row 6 is used when C is OFF; T is ON ; S is OFF
# Row 7 is used when C is OFF; T is OFF ; S is ON
while (i <= length(Sample_File$Order.ID))
{
if (Sample_File$Fiscal.Year[i]!=2016)
{
i<-i+1
next
}
if (Sample_File$Car[i]!=0 & Sample_File$Tire[i]!=0 & Sample_File$Services[i]!=0)#1
{
Csum[1,1] <- Csum[1,1] + Sample_File$Car[i]
Csum[1,2] <- Csum[1,2] + Sample_File$Tire[i]
Csum[1,3] <- Csum[1,3] + Sample_File$Services[i]
}
else if (Sample_File$Car[i]!=0 & Sample_File$Tire[i]!=0 & Sample_File$Services[i]==0) #2
{
Csum[2,1] <- Csum[2,1] + Sample_File$Car[i]
Csum[2,2] <- Csum[2,2] + Sample_File$Tire[i]
Csum[2,3] <- Csum[2,3] + 0
}
else if(Sample_File$Car[i]!=0 & Sample_File$Tire[i]==0 & Sample_File$Services[i]!=0) #3
{
Csum[3,1] <- Csum[3,1] + Sample_File$Car[i]
Csum[3,2] <- Csum[3,2] + 0
Csum[3,3] <- Csum[3,3] + Sample_File$Services[i]
}
else if(Sample_File$Car[i]==0 & Sample_File$Tire[i]!=0 & Sample_File$Services[i]!=0) #4
{
Csum[4,1] <- Csum[4,1] + 0
Csum[4,2] <- Csum[4,2] + Sample_File$Tire[i]
Csum[4,3] <- Csum[4,3] + Sample_File$Services[i]
}
else if(Sample_File$Car[i]!=0 & Sample_File$Tire[i]==0 & Sample_File$Services[i]==0) #5
{
Csum[5,1] <- Csum[5,1] + Sample_File$Car[i]
Csum[5,2] <- Csum[5,2] + 0
Csum[5,3] <- Csum[5,3] + 0
}
else if(Sample_File$Car[i]==0 & Sample_File$Tire[i]!=0 & Sample_File$Services[i]==0)#6
{
Csum[6,1] <- Csum[6,1] + 0
Csum[6,2] <- Csum[6,2] + Sample_File$Tire[i]
Csum[6,3] <- Csum[6,3] + 0
}
else #7
{
Csum[7,1] <- Csum[7,1] + 0
Csum[7,2] <- Csum[7,2] + 0
Csum[7,3] <- Csum[7,3] + Sample_File$Services[i]
}
i<-i+1
}
我编写的代码只处理了一年,因为将代码复制三年非常痛苦。我正在寻找一个解决方案,可以创建3个数据框的列表,每个数据框三年。
这是一个大小为10的随机样本,其中有六个来自原始文件的变量。
dput(Sample_File_Random)
structure(list(Order.ID = c(171, 173, 132, 174, 169, 175, 163,
186, 178, 121), Fiscal.Year = c(2016, 2016, 2017, 2016, 2015,
2016, 2015, 2015, 2015, 2017), Car = c(2, 0, 3, 0, 0, 0, 0, 5346.25,
0, 0), Tire = c(0, 0, 0, 8691.55800460666, 3198, 5, 2, 0, 2,
3282.18), Services = c(3, 0, 4, 0, 0, 0, 0, 0, 0, 0), Insurance = c(4,
0, 0, 4, 0, 4, 0, 0, 0, 0), Accessories = c(94.3, 3749.8, 9308.65,
0, 2, 0, 1, 633.75, 51.44, 0), Finance = c(0, 0, 0, 4, 0, 14800,
0, 0, 0, 0)), .Names = c("Order.ID", "Fiscal.Year", "Car", "Tire",
"Services", "Insurance", "Accessories", "Finance"), row.names = c(NA,
10L), class = "data.frame")
我真的被困了所以我真诚地感谢任何有关矢量化的帮助..
@ Ronak shah的请求:这是Sample_File_Random
Output_File
Fiscal.Year Car Tire Services Insurance Accessories Finance
1 2015 0.00 3202.000 0 0 54.44 0
2 2015 5346.25 0.000 0 0 633.75 0
3 2016 2.00 0.000 3 4 94.30 0
4 2016 0.00 0.000 0 0 3749.80 0
5 2016 0.00 8696.558 0 8 0.00 14804
6 2017 3.00 0.000 4 0 9308.65 0
7 2017 0.00 3282.180 0 0 0.00 0
答案 0 :(得分:1)
这是一个紧凑的&amp;富有表现力的dplyr
解决方案,分三步进行:
以下是执行此操作的代码:
df_foo %>%
# 1. create the combinations of whether each of the
# products is in the basket or not
mutate_each(
funs(In_Basket = . > 0), Car:Services
) %>%
# 2. group by the year and the basket service indicators
group_by_(.dots = c("Fiscal.Year", grep("_In_Basket", names(.), value = TRUE))) %>%
# 3. sum the service values
summarise_each(
funs(sum(., na.rm = TRUE)), Car:Services
)
这给出了输出:
Source: local data frame [7 x 7]
Groups: Fiscal.Year, Car_In_Basket, Tire_In_Basket [?]
Fiscal.Year Car_In_Basket Tire_In_Basket Services_In_Basket Car Tire Services
<dbl> <lgl> <lgl> <lgl> <dbl> <dbl> <dbl>
1 2016 FALSE FALSE TRUE 0 0 8
2 2016 FALSE TRUE FALSE 0 1 0
3 2016 FALSE TRUE TRUE 0 1 1
4 2016 TRUE FALSE TRUE 5 0 5
5 2016 TRUE TRUE FALSE 1 1 0
6 2017 TRUE FALSE TRUE 2 0 3
7 2018 TRUE TRUE FALSE 1 1 0
答案 1 :(得分:0)
这里有一个很好的挑战....
Using your dataset that I called test. I chose to approach this with matrices.
names<-colnames(test[3:8])
library(combinat)
one<-t(combn(names,1))
two<-t(combn(names,2))
three<-t(combn(names,3))
four<-t(combn(names,4))
five<-t(combn(names,5))
six<-t(combn(names,6))
library(plyr)
myset<-unname(rbind.fill.matrix(one,two,three,four,five,six))
head(myset,3); tail(myset,3)
给出以下内容:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "Car" NA NA NA NA NA
[2,] "Tire" NA NA NA NA NA
[3,] "Services" NA NA NA NA NA
[,1] [,2] [,3] [,4] [,5] [,6]
[61,] "Car" "Services" "Insurance" "Accessories" "Finance" NA
[62,] "Tire" "Services" "Insurance" "Accessories" "Finance" NA
[63,] "Car" "Tire" "Services" "Insurance" "Accessories" "Finance"
使用dplyr按年获得总和:
library(dplyr)
testsums<- test %>% select(-Order.ID) %>% group_by(Fiscal.Year) %>% summarise_each(funs(mean))
testsums
A tibble: 3 × 7
Fiscal.Year Car Tire Services Insurance Accessories Finance
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2015 1336.562 800.50 0.00 0 172.0475 0
2 2016 0.500 2174.14 0.75 3 961.0250 3701
3 2017 1.500 1641.09 2.00 0 4654.3250 0
创建1&0和3的matix,乘以相同六个变量的年度和的向量。
mult.matrix<-myset
mult.matrix[!is.na(mult.matrix)]<-1
mult.matrix[is.na(mult.matrix)]<-0
class(mult.matrix) <- "numeric"
head(mult.matrix,3);tail(mult.matrix,3)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 0 0 0 0 0
[2,] 1 0 0 0 0 0
[3,] 1 0 0 0 0 0
[,1] [,2] [,3] [,4] [,5] [,6]
[61,] 1 1 1 1 1 0
[62,] 1 1 1 1 1 0
[63,] 1 1 1 1 1 1
将年度总和转换为矩阵表示法。将它乘以mult.matrix。将3个新列绑定到原始组合数据集。
year_sums<-unname(as.matrix(testsums[1:3,2:7]))
all_sums<-mult.matrix %*% t(year_sums)
myset<-unname(rbind.fill.matrix(one,two,three,four,five,six))
myset<-cbind(myset,all_sums)
head(myset,5); tail(myset,5)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] "Car" NA NA NA NA NA "1336.5625" "0.5" "1.5"
[2,] "Tire" NA NA NA NA NA "1336.5625" "0.5" "1.5"
[3,] "Services" NA NA NA NA NA "1336.5625" "0.5" "1.5"
[4,] "Insurance" NA NA NA NA NA "1336.5625" "0.5" "1.5"
[5,] "Accessories" NA NA NA NA NA "1336.5625" "0.5" "1.5"
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[59,] "Car" "Tire" "Services" "Accessories" "Finance" NA "2309.11" "3139.41450115167" "6298.915"
[60,] "Car" "Tire" "Insurance" "Accessories" "Finance" NA "2309.11" "3139.41450115167" "6298.915"
[61,] "Car" "Services" "Insurance" "Accessories" "Finance" NA "2309.11" "3139.41450115167" "6298.915"
[62,] "Tire" "Services" "Insurance" "Accessories" "Finance" NA "2309.11" "3139.41450115167" "6298.915"
[63,] "Car" "Tire" "Services" "Insurance" "Accessories" "Finance" "2309.11" "6840.41450115166" "6298.915"
这可以清理很多。我选择了完成思考过程。您现在可以获取最终矩阵,将其转换为数据帧,重命名标题等...