我需要生成'n'个变量的所有可能组合,使变量总和为100.变量的范围从0到100,并且可以改变步骤1.我在R中为此创建了一个代码n = 10,结果数据帧包含所有可能的组合。但是,我正在寻找使'n'动态化的可能性,这样用户可以灵活地在启动时传递n作为参数。 任何帮助将受到高度赞赏..
row <- list()
z = 1
for (a in seq(from = 0, to = 100, by = 1)) {
for (b in seq(from = 0, to = 100, by = 1)) {
for (c in seq(from = 0, to = 100, by = 1)) {
for (d in seq(from = 0, to = 100, by = 1)) {
for (e in seq(from = 0, to = 100, by = 1)) {
for (f in seq(from = 0, to = 100, by = 1)) {
for (g in seq(from = 0, to = 100, by = 1)) {
for (h in seq(from = 0, to = 100, by = 1)) {
for (i in seq(from = 0, to = 100, by = 1)) {
for (j in seq(from = 0, to = 100, by = 1)) {
if (a + b + c + d + e + f + g + h + i + j == 100) {
row[[z]] <- (c(a,b,c,d,e,f,g,h,i,j))
z = z + 1
}
}
}
}
}
}
}
}
}
}
}
finaldata <- as.data.frame(do.call(rbind, row))
答案 0 :(得分:1)
ptn <- function(n,k) if (k<=1L) list(n) else do.call(c,lapply(seq_len(n+1L)-1L,function(x) lapply(ptn(x,k-1L),c,n-x)));
演示:
ptn(1,1);
## [[1]]
## [1] 1
##
ptn(2,1);
## [[1]]
## [1] 2
##
ptn(1,2);
## [[1]]
## [1] 0 1
##
## [[2]]
## [1] 1 0
##
ptn(2,2);
## [[1]]
## [1] 0 2
##
## [[2]]
## [1] 1 1
##
## [[3]]
## [1] 2 0
##
ptn(3,2);
## [[1]]
## [1] 0 3
##
## [[2]]
## [1] 1 2
##
## [[3]]
## [1] 2 1
##
## [[4]]
## [1] 3 0
##
ptn(3,3);
## [[1]]
## [1] 0 0 3
##
## [[2]]
## [1] 0 1 2
##
## [[3]]
## [1] 1 0 2
##
## [[4]]
## [1] 0 2 1
##
## [[5]]
## [1] 1 1 1
##
## [[6]]
## [1] 2 0 1
##
## [[7]]
## [1] 0 3 0
##
## [[8]]
## [1] 1 2 0
##
## [[9]]
## [1] 2 1 0
##
## [[10]]
## [1] 3 0 0
##
生成你想要的分区集是不切实际的,即从10开始生成100分。即使从5中获得100也是推动它:
system.time({ x <- ptn(100,5); });
## user system elapsed
## 32.594 0.141 32.790
length(x);
## [1] 4598126
system.time({ print(unique(sapply(x,sum))); });
## [1] 100
## user system elapsed
## 6.938 0.063 7.004
length(unique(x));
## [1] 4598126
在这里,我还编写了一个递归计算分区集大小的函数,而不会产生实际生成集合的CPU或内存成本。注意:缓存是必不可少的,否则CPU命中将类似于完整生成算法。
ptnSize <- function(n,k,cache=new.env()) if (k<=1L) 1 else { key <- paste0(n,'/',k); if (is.null(cache[[key]])) cache[[key]] <- do.call(sum,lapply(seq_len(n+1L)-1L,function(x) ptnSize(x,k-1L,cache))); cache[[key]]; };
演示:
ptnSize(1,1);
## [1] 1
ptnSize(2,1);
## [1] 1
ptnSize(1,2);
## [1] 2
ptnSize(2,2);
## [1] 3
ptnSize(3,2);
## [1] 4
ptnSize(3,3);
## [1] 10
ptnSize(100,5);
## [1] 4598126
ptnSize(100,10);
## [1] 4.263422e+12
正如我们所看到的,您所需的分区集相当大。我估计需要存储数百TB的内存。
答案 1 :(得分:0)
parti <- function(n, k) {
if (n<0) { message("error: n<0"); return(NA) }
if (k==1) return(matrix(n,1,1))
M <- cbind(parti(n, k-1), 0)
if (n>0) for (i in 1:n) M <- rbind(M, cbind(parti(n-i, k-1), i))
M
}
parti(5, 3)
结果:
> parti(5, 3)
i
[1,] 5 0 0
[2,] 4 1 0
[3,] 3 2 0
[4,] 2 3 0
[5,] 1 4 0
[6,] 0 5 0
[7,] 4 0 1
[8,] 3 1 1
[9,] 2 2 1
[10,] 1 3 1
[11,] 0 4 1
[12,] 3 0 2
[13,] 2 1 2
[14,] 1 2 2
[15,] 0 3 2
[16,] 2 0 3
[17,] 1 1 3
[18,] 0 2 3
[19,] 1 0 4
[20,] 0 1 4
[21,] 0 0 5
对于您的情况(n = 100,k = 10),您将遇到内存和时间问题,因为有许多分区!