我有这种格式的数据:
CREATE TABLE data(y int)
INSERT INTO data VALUES ((1))
INSERT INTO data VALUES ((55555))
INSERT INTO data VALUES ((55555))
INSERT INTO data VALUES ((99999))
我想创建一个直方图,以便大致了解我的数据的分布情况。我正在考虑将此格式作为输出:
lowerBoundary upperBoundary y
------------- ------------- -----------
0 9999 1
10000 19999 0
20000 29999 0
30000 39999 0
40000 49999 0
50000 59999 2
60000 69999 0
70000 79999 0
80000 89999 0
90000 99999 1
答案 0 :(得分:0)
您必须创建一个数字表,以便正确显示0
- 行。然后你可以计算每个“组”的下边界和上边界。
示例SQL:
SELECT lowerBoundary, upperBoundary, COUNT(d.y) AS y
FROM (
SELECT n*10000 AS lowerBoundary, (n+1)*10000-1 AS upperBoundary
FROM (
-- Selects possible groups. Make this big enough for your data.
SELECT ones.n + 10*tens.n + 100*hundreds.n AS n
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n)
) numbersTable
) boundaries
-- join with data
LEFT JOIN data d
ON d.y BETWEEN lowerBoundary AND upperBoundary
-- avoid trailing '0' rows
WHERE lowerBoundary <= (SELECT MAX(d.y) FROM data d)
GROUP BY lowerBoundary, upperBoundary
ORDER BY 1
答案 1 :(得分:0)
另一种选择......
我使用TVF生成动态范围。作为单一语句功能,它非常快。此外,如果您不能使用UDF,逻辑很容易移植到cte或子查询中。
Select RetVal1
,RetVaL2
,y = sum(case when y is null then 0 else 1 end)
From [dbo].[udf-Range-Number-Span](0,100000,10000) A
Left Join Data B on y>=RetVal1 and y<RetVal2
Group By RetVal1,RetVal2
<强>返回强>
RetVal1 RetVaL2 y
0.00 10000.00 1
10000.00 20000.00 0
20000.00 30000.00 0
30000.00 40000.00 0
40000.00 50000.00 0
50000.00 60000.00 2
60000.00 70000.00 0
70000.00 80000.00 0
80000.00 90000.00 0
90000.00 100000.00 1
UDF(如果需要)
CREATE FUNCTION [dbo].[udf-Range-Number-Span] (@R1 money,@R2 money,@Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((@R2-@R1)/@Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal1=@R1,RetVal2=@R1+@Incr
Union All
Select N+1,(N*@Incr)+@R1,((N*@Incr)+@R1)+@Incr
From cte2,cte0
Where N<cte0.M
)
--Max 100 million observations
--Select * from [dbo].[udf-Range-Number-Span](1,4,.5)