这真的是一个难题。它之前可能已被问过,但我找不到任何东西,所以我想我会分享这个问题。
我正在尝试在应用程序中实现某种负载平衡,并将问题减少到我认为应该是一个简单的TSQL练习(该应用程序主要在SQL Server域(SQL Server 2008 R2)中)
基本上我有一个有两个整数的表;唯一的,顺序的Id和非唯一的值。该表可以包含任意数量的记录,我想生成一个数据表,其中前n个最大值被分成单独的“分组”,然后第二组n个最大值被分成单独的“分组”。 / p>
我在下面有一份初稿,但我相信它可以改进......
DECLARE @GroupCount INT = 5
-- Set up the test data
DECLARE @test TABLE (Id INT IDENTITY(1, 1), Value INT)
INSERT @Test (Value)
VALUES (100), (456), (121), (402), (253), (872), (765), (6529), (1029), (342), (98), (1), (0), (4), (46), (23), (456), (416), (2323), (4579)
--Order by Value descending
;WITH cte AS
(
SELECT *
,ROW_NUMBER() OVER (ORDER BY Value DESC) RowNum
FROM @Test
)
--use modulus to split into grouping
, cte2 AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY RowNum % @GroupCount ORDER BY RowNum DESC) Rnk
FROM cte
)
SELECT ROW_NUMBER() OVER (PARTITION BY Rnk ORDER BY Value DESC) AS 'Grouping'
,Value
,Id
FROM cte2
ORDER BY [Grouping], Value ASC
这适用于并生成以下数据集:
Grouping, Value, Id
======== ===== ==
1 46 15
1 342 10
1 765 7
1 6529 8
2 23 16
2 253 5
2 456 2
2 4579 20
3 4 14
3 121 3
3 456 17
3 2323 19
4 1 12
4 100 1
4 416 18
4 1029 9
5 0 13
5 98 11
5 402 4
5 872 6
返回的数据集是正确的,因为前n个最大值被拆分为单独的分组,依此类推,但每个分组中的总值在分组1中与分组5(例如)相比完全不同。
分组和SUMmed时,我们可以看到非均匀传播:
Grouping, SummedValues
======== ============
1 7682
2 5311
3 2904
4 1546
5 1372
在尽可能少的行中,如何更好地平衡值,以便每个分组中的总值更均匀地分布?
答案 0 :(得分:1)
这里的NTILE
函数在sql server中可以帮到你。
DECLARE @GroupCount INT = 5
-- Set up the test data
DECLARE @test TABLE (Id INT IDENTITY(1, 1), Value INT)
INSERT @Test (Value)
SELECT 100
UNION ALL
SELECT 456
UNION ALL
SELECT 121
UNION ALL
SELECT 402
UNION ALL
SELECT 253
UNION ALL
SELECT 872
UNION ALL
SELECT 765
UNION ALL
SELECT 6529
UNION ALL
SELECT 1029
UNION ALL
SELECT 342
UNION ALL
SELECT 98
UNION ALL
SELECT 1
UNION ALL
SELECT 0
UNION ALL
SELECT 4
UNION ALL
SELECT 46
UNION ALL
SELECT 23
UNION ALL
SELECT 456
UNION ALL
SELECT 416
UNION ALL
SELECT 2323
UNION ALL
SELECT 4579
;With cte
AS
(
SELECT *, NTILE(@GroupCount) OVER(ORDER BY Value DESC) AS GroupNo FROM @Test
)
SELECT GroupNo, SUM(Value) AS SummedValues FROM cte
GROUP BY GroupNo
我得到了这个结果。
GroupNo SummedValues
--------------------
1 14460
2 2549
3 1413
4 365
5 28
答案 1 :(得分:1)
稍微好一点的方法就是“选择”蛇。你排在第1,第6,第11位 - 当然高于第5,第10,第15位。
最好是第1,第10,第11,第5,第6,第15。仍然不完美,并且您的特定数据仍然非常差,但略好于您的。
DECLARE @GroupCount INT = 5
-- Set up the test data
DECLARE @test TABLE (Id INT IDENTITY(1, 1), Value INT)
INSERT @Test (Value)
SELECT 100
UNION ALL
SELECT 456
UNION ALL
SELECT 121
UNION ALL
SELECT 402
UNION ALL
SELECT 253
UNION ALL
SELECT 872
UNION ALL
SELECT 765
UNION ALL
SELECT 6529
UNION ALL
SELECT 1029
UNION ALL
SELECT 342
UNION ALL
SELECT 98
UNION ALL
SELECT 1
UNION ALL
SELECT 0
UNION ALL
SELECT 4
UNION ALL
SELECT 46
UNION ALL
SELECT 23
UNION ALL
SELECT 456
UNION ALL
SELECT 416
UNION ALL
SELECT 2323
UNION ALL
SELECT 4579
--Order by Value descending
;WITH cte AS
(
SELECT *
,ROW_NUMBER() OVER (ORDER BY Value DESC) RowNum
FROM @Test
)
--use modulus to split into grouping
, cte2 AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY RowNum % (@GroupCount*2 ) ORDER BY RowNum DESC) Rnk
FROM cte
)
select [Grouping], SUM(value) from (
SELECT floor(abs(@GroupCount - (ROW_NUMBER() OVER (PARTITION BY Rnk ORDER BY Value DESC) - 0.5)) + 0.5) AS 'Grouping'
,Value
,Id
FROM cte2
--ORDER BY [Grouping], Value ASC
) a group by [Grouping]
order by [Grouping] ASC
最终虽然我认为随机分配可能比这更好,但是在检查总和不是2 *(1 /分组*总数)时可能是随机分配。
我觉得这不是TSQL或任何SQL很好解决的问题;可以逐行控制流量的语言将更好地为您服务。 Python,C#,SAS,工具箱中的其他工具。 (PL / SQL是我考虑去的地方......)
任何可以让你在行级别上说“跟踪到目前为止已分配的内容,将此特定情况分配给目前为止数量最少的存储桶”的任何内容都会更好。< / p>
Grouping Summed Values
---------------------
1 1781
2 1608
3 2904
4 5249
5 7273
答案 2 :(得分:1)
将ntile
和row_number
窗口函数一起使用,不仅可以将其拆分为偶数组(甚至可以通过计数,而不是求和),还可以更好地决定要在每个组中包含哪些值尽可能地平衡每组中的总数。
<强>答案:强>
select case b.grp_split when 1 then b.grp_split_rnk_desc else grp_split_rnk_asc end as [grouping]
, b.value
, b.id
from (
select a.id
, a.value
, a.grp_split
, row_number() over (partition by a.grp_split order by a.value desc) grp_split_rnk_desc
, row_number() over (partition by a.grp_split order by a.value asc) grp_split_rnk_asc
from (
select t.id
, t.value
, ntile(@ntile_cnt) over (order by t.value desc) as grp_split
from @test as t
) as a
) as b
order by case b.grp_split when 1 then b.grp_split_rnk_desc else grp_split_rnk_asc end asc
, b.value asc
<强>结果:强>
不完美,但稍微接近。
Group Total
1 7029
2 5096
3 2904
4 1761
5 2025
答案 3 :(得分:1)
这是有缺陷的,但鉴于示例数据并不可怕。你的旅费可能会改变。
declare @groupcount int = 5;
create table t (id int identity(1, 1), value int);
insert t values
(100),(456),(121),(402),(253),(872),(765),(6529),(1029),(342)
, (98),(1),(0),(4),(46),(23),(456),(416),(2323),(4579);
;with cte as (
select *
, rn = row_number() over (order by value asc)
, pct = value/sum(value+.0) over()
, target = 1.0 / @groupcount
from t
)
, remaining as (
select id, value, rn
, grp = convert(int,(sum(value) over (order by rn)/sum(value+.0) over())*@groupCount)+1
from cte
)
select
grp = row_number() over (order by sum(value) desc)
, sumValue = sum(value)
from remaining
group by grp
rextester演示:http://rextester.com/UNV61100
结果:
+-----+----------+
| grp | sumValue |
+-----+----------+
| 1 | 6529 |
| 2 | 4579 |
| 3 | 3483 |
| 4 | 2323 |
| 5 | 1901 |
+-----+----------+
<小时/> Sql Server 2008兼容版本:
declare @groupcount int = 5;
create table t (id int identity(1, 1), value int);
insert t values
(100),(456),(121),(402),(253),(872),(765),(6529),(1029),(342)
, (98),(1),(0),(4),(46),(23),(456),(416),(2323),(4579);
;with cte as (
select *
, rn = row_number() over (order by value asc)
, pct = value/tv.TotalValue
, target = 1.0 / @groupcount
from t
cross join (select TotalValue = sum(value+.0) from t) tv
)
, remaining as (
select id, value, rn
, grp = convert(int,((x.sumValueOver/TotalValue)*@groupcount)+1)
from cte
outer apply (
select sumValueOver = sum(value)
from cte i
where i.rn <= cte.rn
) x
)
select
grp = row_number() over (order by sum(value) desc)
, sumValue = sum(value)
from remaining
group by grp
rextester演示:http://rextester.com/DEUDJ77007
返回:
+-----+----------+
| grp | sumValue |
+-----+----------+
| 1 | 6529 |
| 2 | 4579 |
| 3 | 3483 |
| 4 | 2323 |
| 5 | 1901 |
+-----+----------+
答案 4 :(得分:0)
结果是由第一个最大值定义的主要结果。所以你可以尝试以相反的顺序排序所有其余的
WITH cte AS
(
SELECT *
,ROW_NUMBER() OVER (ORDER BY Value DESC) RowNum
FROM @Test
)
--use modulus to split into grouping
, cte2 AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY RowNum % @GroupCount ORDER BY RowNum ) Rnk
FROM cte
)
,cte3 AS
(SELECT ROW_NUMBER() OVER (PARTITION BY Rnk ORDER BY case rnk when 1 then Value else -Value end DESC) AS [Grouping]
,Value
,Id
FROM cte2
)
select [Grouping],sum(value)
from cte3
group by [Grouping]
order by [Grouping];
结果
Grouping (No column name)
1 1 7029
2 2 5096
3 3 2904
4 4 1761
5 5 2025