找到数字的组合,以使其加法最接近数字?

时间:2019-01-12 07:17:27

标签: r excel

我有一列名为Orders。我想按这样的方式将它们分为几类:集群中的订单总数接近300。下面是输入。

**Orders**
100
198
50
40
215
296

输出应类似于

Orders  Group
100     1
198     1
50      2
40      2
215     2
296     3

这只是示例数据。在Real中,数据非常庞大。可以使用R完成此操作。

2 个答案:

答案 0 :(得分:2)

结果

下面是解决问题的函数,尽管这里是结果

find_grouping(orders, 300L)
#      orders group
# [1,]    100     1
# [2,]    198     1
# [3,]     50     2
# [4,]     40     2
# [5,]    215     2
# [6,]    296     3

allocate_groups(orders, 300L, 3L)    # third argument <-> max. num. of groups
#      orders group
# [1,]    100     3
# [2,]    198     3
# [3,]     50     2
# [4,]     40     2
# [5,]    215     2
# [6,]    296     1 

# bigger vector
set.seed(123)
orders <- sample(1:300, 15)
find_grouping(orders, 300L)
#       orders group
#  [1,]     87     2
#  [2,]    236     2
#  [3,]    122     3
#  [4,]    263     4
#  [5,]    279     5
#  [6,]     14     9
#  [7,]    156     6
#  [8,]    262     7
#  [9,]    162     8
# [10,]    133     8
# [11,]    278     9
# [12,]    132    10
# [13,]    196     1
# [14,]    165    10
# [15,]     30     7
allocate_groups(orders, 300L, 3L)
#       orders group
#  [1,]     87     1
#  [2,]    236     2
#  [3,]    122     3
#  [4,]    263     3
#  [5,]    279     1
#  [6,]     14     2
#  [7,]    156     3
#  [8,]    262     3
#  [9,]    162     2
# [10,]    133     1
# [11,]    278     2
# [12,]    132     2
# [13,]    196     1
# [14,]    165     1
# [15,]     30     3

带有数据orders = c(100L, 198L, 50L, 40L, 215L, 296L)


编辑:新功能

考虑到要指定组数的附加约束,这是一个新功能

create_groups <- function (orders, num, group_num) {
  orders
  groups <- rep(list(NA_integer_), group_num)
  for (k in sort(orders, decreasing = TRUE)) {
    sums <- vapply(1:group_num, function (s) as.integer(sum(groups[[s]], na.rm = TRUE)), integer(1))
    index <- ifelse(any(sums + k <= num), which(sums + k <= num)[which.min(abs(sums[which(sums + k <= num)]+k - num))], NA_integer_)
    index <- ifelse(is.na(index), which.min(sums), index)
    groups[[index]] <- append(groups[[index]],k)
    groups[[index]] <- groups[[index]][!is.na(groups[[index]])]
  }
  groups
}
allocate_groups <- function (orders, num, group_num) {
  groups <- create_groups(orders, num, group_num)
  g <- rep(seq_along(groups), sapply(groups, length))
  out <- cbind(orders, group = g[match(orders, unlist(groups))])
  out
}
# results above

增加的约束实际上使问题变得更简单:我们最多要填充norders抽屉,并且总和应接近num尽可能。


功能

这是该功能的完整代码

find_grouping <- function (orders, num) {
    combs2 <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
    combs2 <- cbind.data.frame(combs2,close=abs(num - combs2[,3]))
    out <- integer(length(orders))
    skip <- NA_integer_
    group <- 1L
    for (k in seq_along(out)) {
      val1 <- orders[k]
      if (val1 %in% skip) next
      ind1 <- (.subset2(combs2,1L) == val1) | (.subset2(combs2,2L) == val1)  
      ind2 <- (which.min(.subset2(combs2, 4L)[ind1]))
      ind3 <- which(ind1)[ind2]
      val2 <- .subset2(combs2, 3L)[ind3]
      if (abs(num-val1) <= abs(num-val2)) {
        out[k] <- group
        group  <- group + 1L
        next
      }
      intList <- as.integer(combs2[ind3,1:2])
      ordersRemain <- setdiff(orders, intList)
      if (abs(num-val2) <= abs(num-val2-min(ordersRemain))) {
        skip <- c(skip, intList)
        out[orders %in% intList] <- group
        group <- group + 1
        next
      }
      val3 <- val2
      cond <- FALSE
      while (!cond) {
        toAdd <- which.min(abs(num - (val2 + ordersRemain)))
        val3 <- val3 + ordersRemain[toAdd]
        intList <- c(intList, ordersRemain[toAdd])
        ordersRemain <- ordersRemain[-toAdd]
        cond <- abs(num-val3) <= abs(num-val2-min(ordersRemain))
      }
      skip <- c(skip, intList)
      out[orders %in% intList] <- group
      group <- group + 1
    }
    cbind(orders,group=out)
}

说明

第一步是生成订单的所有组合(共2个)  使用RcppAlgos::comboGeneral(这是一种非常快速的方法)

# num
combs <- RcppAlgos::comboGeneral(orders, 2L, constraintFun = 'sum')
combs <- cbind.data.frame(combs,close=abs(num - combs[,3])) # check how far from num are the combinations
#      1   2   3 close
# 1  100 198 298     2
# 2  100  50 150   150
# 3  100  40 140   160
# 4  100 215 315    15
# ...

从现在开始,有几种方法。我选择了一个循环,在该循环中,我会在每次迭代中找到当前值num的最佳组合(即最接近orders[k]),然后记住给定的组合(例如100;198)并指定组合group值。

答案 1 :(得分:0)

这解决了您提出的问题的一个变体,其中组总和可能不超过目标总和。

library(BBmisc); library(dplyr);
bin.capacity <- 305
df <- data.frame(Orders = c(100,198,50,40,215,296)) %>%
  mutate(Group = BBmisc::binPack(Orders,bin.capacity))
> df
  Orders Group
1    100     3
2    198     3
3     50     2
4     40     2
5    215     2
6    296     1

对于bin.capacity = 300:

> df
  Orders Group
1    100     3
2    198     3
3     50     2
4     40     4
5    215     2
6    296     1