我有一列名为Orders。我想按这样的方式将它们分为几类:集群中的订单总数接近300。下面是输入。
**Orders**
100
198
50
40
215
296
输出应类似于
Orders Group
100 1
198 1
50 2
40 2
215 2
296 3
这只是示例数据。在Real中,数据非常庞大。可以使用R完成此操作。
答案 0 :(得分:2)
下面是解决问题的函数,尽管这里是结果
find_grouping(orders, 300L)
# orders group
# [1,] 100 1
# [2,] 198 1
# [3,] 50 2
# [4,] 40 2
# [5,] 215 2
# [6,] 296 3
allocate_groups(orders, 300L, 3L) # third argument <-> max. num. of groups
# orders group
# [1,] 100 3
# [2,] 198 3
# [3,] 50 2
# [4,] 40 2
# [5,] 215 2
# [6,] 296 1
# bigger vector
set.seed(123)
orders <- sample(1:300, 15)
find_grouping(orders, 300L)
# orders group
# [1,] 87 2
# [2,] 236 2
# [3,] 122 3
# [4,] 263 4
# [5,] 279 5
# [6,] 14 9
# [7,] 156 6
# [8,] 262 7
# [9,] 162 8
# [10,] 133 8
# [11,] 278 9
# [12,] 132 10
# [13,] 196 1
# [14,] 165 10
# [15,] 30 7
allocate_groups(orders, 300L, 3L)
# orders group
# [1,] 87 1
# [2,] 236 2
# [3,] 122 3
# [4,] 263 3
# [5,] 279 1
# [6,] 14 2
# [7,] 156 3
# [8,] 262 3
# [9,] 162 2
# [10,] 133 1
# [11,] 278 2
# [12,] 132 2
# [13,] 196 1
# [14,] 165 1
# [15,] 30 3
带有数据orders = c(100L, 198L, 50L, 40L, 215L, 296L)
。
考虑到要指定组数的附加约束,这是一个新功能
create_groups <- function (orders, num, group_num) {
orders
groups <- rep(list(NA_integer_), group_num)
for (k in sort(orders, decreasing = TRUE)) {
sums <- vapply(1:group_num, function (s) as.integer(sum(groups[[s]], na.rm = TRUE)), integer(1))
index <- ifelse(any(sums + k <= num), which(sums + k <= num)[which.min(abs(sums[which(sums + k <= num)]+k - num))], NA_integer_)
index <- ifelse(is.na(index), which.min(sums), index)
groups[[index]] <- append(groups[[index]],k)
groups[[index]] <- groups[[index]][!is.na(groups[[index]])]
}
groups
}
allocate_groups <- function (orders, num, group_num) {
groups <- create_groups(orders, num, group_num)
g <- rep(seq_along(groups), sapply(groups, length))
out <- cbind(orders, group = g[match(orders, unlist(groups))])
out
}
# results above
增加的约束实际上使问题变得更简单:我们最多要填充n
个orders
抽屉,并且总和应接近num
尽可能。
这是该功能的完整代码
find_grouping <- function (orders, num) {
combs2 <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
combs2 <- cbind.data.frame(combs2,close=abs(num - combs2[,3]))
out <- integer(length(orders))
skip <- NA_integer_
group <- 1L
for (k in seq_along(out)) {
val1 <- orders[k]
if (val1 %in% skip) next
ind1 <- (.subset2(combs2,1L) == val1) | (.subset2(combs2,2L) == val1)
ind2 <- (which.min(.subset2(combs2, 4L)[ind1]))
ind3 <- which(ind1)[ind2]
val2 <- .subset2(combs2, 3L)[ind3]
if (abs(num-val1) <= abs(num-val2)) {
out[k] <- group
group <- group + 1L
next
}
intList <- as.integer(combs2[ind3,1:2])
ordersRemain <- setdiff(orders, intList)
if (abs(num-val2) <= abs(num-val2-min(ordersRemain))) {
skip <- c(skip, intList)
out[orders %in% intList] <- group
group <- group + 1
next
}
val3 <- val2
cond <- FALSE
while (!cond) {
toAdd <- which.min(abs(num - (val2 + ordersRemain)))
val3 <- val3 + ordersRemain[toAdd]
intList <- c(intList, ordersRemain[toAdd])
ordersRemain <- ordersRemain[-toAdd]
cond <- abs(num-val3) <= abs(num-val2-min(ordersRemain))
}
skip <- c(skip, intList)
out[orders %in% intList] <- group
group <- group + 1
}
cbind(orders,group=out)
}
第一步是生成订单的所有组合(共2个)
使用RcppAlgos::comboGeneral
(这是一种非常快速的方法)
# num
combs <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
combs <- cbind.data.frame(combs,close=abs(num - combs[,3])) # check how far from num are the combinations
# 1 2 3 close
# 1 100 198 298 2
# 2 100 50 150 150
# 3 100 40 140 160
# 4 100 215 315 15
# ...
从现在开始,有几种方法。我选择了一个循环,在该循环中,我会在每次迭代中找到当前值num
的最佳组合(即最接近orders[k]
),然后记住给定的组合(例如100;198
)并指定组合group
值。
答案 1 :(得分:0)
这解决了您提出的问题的一个变体,其中组总和可能不超过目标总和。
library(BBmisc); library(dplyr);
bin.capacity <- 305
df <- data.frame(Orders = c(100,198,50,40,215,296)) %>%
mutate(Group = BBmisc::binPack(Orders,bin.capacity))
> df
Orders Group
1 100 3
2 198 3
3 50 2
4 40 2
5 215 2
6 296 1
对于bin.capacity = 300:
> df
Orders Group
1 100 3
2 198 3
3 50 2
4 40 4
5 215 2
6 296 1