使用SQL Server,我需要为每行的值获取一个包含-1,0,1,2的列计数。每个表有不同数量的列(一个表有55个字段)
-1 value = no answer or unknown (unknown_count)
0 value = ok (ok_count)
1 value = bad (bad_count)
2 value = not applicable (na_count)
第1行看起来像这样
rowid name field_1 field_2 field_3 field_4
1 line_1 -1 1 2 1
2 line_2 2 1 -1 0
等...
我希望看到结果
rowid na_count ok_count bad_count unknown_count
1 1 0 2 1
2 1 1 1 1
额外的信用问题....我需要计算用于构建计数的所有字段(因此我可以提供百分位报告)
答案 0 :(得分:2)
您需要UNPIVOT
和一些聚合的组合。将数据展开为更合理的形式,然后按rowid
分组,总结各种值:
with data AS
(
select
rowid,
field,
value
from
( SELECT rowid, field_1,field_2,field_3,field_4
FROM MyTable) p
UNPIVOT
( value FOR field IN (field_1,field_2,field_3,field_4) ) as unpvt
)
SELECT
rowid,
SUM(CASE WHEN value = 2 THEN 1 ELSE 0 END) AS na_count,
SUM(CASE WHEN value = 0 THEN 1 ELSE 0 END) AS ok_count,
SUM(CASE WHEN value = 1 THEN 1 ELSE 0 END) AS bad_count,
SUM(CASE WHEN value = -1 THEN 1 ELSE 0 END) AS unknown_count
from data
group by rowId
答案 1 :(得分:0)
以Jamiec的答案和我之前使用的一些代码为基础;如果您不想在查询中键入所有列名,可以使用动态SQL来构建查询,如下所示:
declare @tab nvarchar(max)
set @tab = N'your_table' -- change to your table name
declare @cols nvarchar(max)
select @cols = coalesce(@cols+N',', N'') + quotename(c.name) from syscolumns c
inner join sysobjects o on c.id = o.id and o.xtype = 'u'
where o.name = @tab
and c.name not in ('rowid', 'name') -- exclude the columns that don't hold data values
order by c.colid
declare @sql nvarchar(max)
select @sql = N'
select
rowid,
sum(case when val = 2 then 1 else 0 end) as ''na_count'',
sum(case when val = 0 then 1 else 0 end) as ''ok_count'',
sum(case when val = 1 then 1 else 0 end) as ''bad_count'',
sum(case when val = -1 then 1 else 0 end) as ''unknown_count'',
count(*) as column_count
from (select rowid, ' + @cols + N' from ' + @tab + N') as src
unpivot (val for col in (' + @cols + N')) as unpvt
group by rowid'
exec sp_executesql @sql
答案 2 :(得分:0)
我认为在动态脚本中总结所有列会更有效,而不是使用unpivot。在一百万行中,相对于执行计划中的批处理,我获得了16%和84%(此代码的输出@query与unpivot代码相比)。
您可以使用相同的逻辑来获取每列的百分比。如果您希望我也为您提供代码,请与我们联系。
--DROP TABLE TMP_Test
CREATE TABLE TMP_Test
(
rowid INT PRIMARY KEY IDENTITY(1,1)
, name varchar(10)
, field_1 INT
, field_2 INT
, field_3 INT
, field_4 INT
)
INSERT INTO TMP_Test
SELECT name = 'line_1', field_1 = -1, field_2=1, field_3=2, field_4=1
UNION ALL
SELECT name = 'line_2', field_1 = 2, field_2=1, field_3=-1, field_4= 0
/*
WHILE((SELECT COUNT(*) FROM TMP_Test) < 1000000)
BEGIN
INSERT INTO TMP_Test
SELECT name, field_1, field_2, field_3, field_4 FROM TMP_Test
END
*/
GO
DECLARE @query VARCHAR(MAX) = '';
DECLARE @schema VARCHAR(128) = 'dbo';
DECLARE @table VARCHAR(128) = 'TMP_Test';
DECLARE @na_count VARCHAR(max) = '';
DECLARE @ok_count VARCHAR(max) = '';
DECLARE @bad_count VARCHAR(max) = '';
DECLARE @unknown_count VARCHAR(max) = '';
SELECT
@na_count = @na_count + IIF(@na_count = '', '', ' + ') + 'IIF(' + COLUMN_NAME + ' = 2, 1, 0)'
, @ok_count = @ok_count + IIF(@ok_count = '', '', ' + ') + 'IIF(' + COLUMN_NAME + ' = 0, 1, 0)'
, @bad_count = @bad_count + IIF(@bad_count = '', '', ' + ') + 'IIF(' + COLUMN_NAME + ' = 1, 1, 0)'
, @unknown_count = @unknown_count + IIF(@unknown_count = '', '', ' + ') + 'IIF(' + COLUMN_NAME + ' = -1, 1, 0)'
FROM INFORMATION_SCHEMA.COLUMNS WHERE
TABLE_SCHEMA = @schema
AND TABLE_NAME = @table
AND COLUMN_NAME NOT IN ('rowid', 'name')
ORDER BY ORDINAL_POSITION;
SET @query = '
SELECT
rowid
, na_count = ' + @na_count + '
, ok_count = ' + @ok_count + '
, bad_count = ' + @bad_count + '
, unknown_count = ' + @unknown_count + '
FROM [' + @schema + '].[' + @table + ']';
PRINT(@query);
EXEC(@query);