T-SQL |字符串"操纵"和聚合

时间:2014-10-03 16:28:16

标签: sql sql-server tsql sql-server-2012

我有以下情况。

消息来源表1

CREATE TABLE #Table1 
(
     Div varchar(10), 
     Dept varchar(10), 
     States varchar(10)
)

INSERT INTO #Table1
   SELECT 'Div1','Dept1','CA,NV,TX'
   UNION ALL
   SELECT 'Div2','Dept2','MI,OH,IN'
   UNION ALL
   SELECT 'Div3','Dept2','NY,NJ,PA'
   UNION ALL
   SELECT 'Div4','Dept1',NULL

消息来源表2

CREATE TABLE #Table2 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(10)
)

INSERT INTO #Table2
   SELECT 'Div1','Dept1','CA'
   UNION ALL
   SELECT 'Div1','Dept1','NV, TX'
   UNION ALL
   SELECT 'Div1','Dept1','TX, CA'
   UNION ALL
   SELECT 'Div1','Dept1','CA, NV'
   UNION ALL
   SELECT 'Div2','Dept2','MI, OH'
   UNION ALL
   SELECT 'Div2','Dept2','MI, IN'
   UNION ALL
   SELECT 'Div2','Dept2','OH'
   UNION ALL
   SELECT 'Div3','Dept2','NY, NJ, PA'

渴望输出

CREATE TABLE #Table3 
(
    Div varchar(10), 
    Dept varchar(10), 
    States varchar(50)
)

INSERT INTO #Table3
SELECT 'Div1','Dept1','CA - (3), NV - (2), TX - (2)'
UNION ALL
SELECT 'Div2','Dept2','MI - (2), OH - (2), IN - (1)'
UNION ALL
SELECT 'Div3','Dept2','NY - (1), NJ - (1), PA - (1)'
UNION ALL
SELECT 'Div4','Dept1',NULL

SELECT * FROM #Table1
SELECT * FROM #Table2
SELECT * FROM #Table3

DROP TABLE #Table1
DROP TABLE #Table2
DROP TABLE #Table3

SQLFIDDLE

目标:根据#Table1#Table2,加入DivDept字段中的两个表,然后汇总States字段中不同状态的计数并创建一个输出,其中DivDeptStates具有该州旁边打印的每个状态的唯一计数。

我不知道如何实现这一目标。我正在尝试LIKE,但无法弄清楚如何让它变得动态。我会继续试着看看能不能搞清楚。以为我会在这里发布这个问题,看看能否得到一些帮助。

谢谢

更新:

期望输出

Div     Dept    States
Div1    Dept1   CA - (3), NV - (2), TX - (2)
Div2    Dept2   MI - (2), OH - (2), IN - (1)
Div3    Dept2   NY - (1), NJ - (1), PA - (1)
Div4    Dept1   NULL

3 个答案:

答案 0 :(得分:6)

好的,首先,您需要在#Temp1#Temp2中拆分连接值。这样做的方法有很多种,我将使用来自Aaron Bertrand的in this awesome blog描述的数字表。所以,我们需要一个数字表,可以这样做:

;WITH n AS
(
    SELECT  x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
)
SELECT Number = x
INTO #Numbers
FROM n
WHERE x BETWEEN 1 AND 8000;

然后,你需要实际进行拆分,然后为你的结果进行组连接方法:

;WITH T1 AS
(
    SELECT *
    FROM #Table1 T
    OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                         CHARINDEX(',',T.States + ',', Number) - 
                                         Number)
                 FROM #Numbers
                 WHERE Number <= CONVERT(INT, LEN(T.States))
                 AND SUBSTRING(',' + T.States, Number, LEN(',')) = ',') N
), T2 AS
(
    SELECT *
    FROM #Table2 T
    OUTER APPLY (SELECT Item = SUBSTRING(T.States, Number,
                                         CHARINDEX(', ',T.States + ', ', Number) - 
                                         Number)
                 FROM #Numbers
                 WHERE Number <= CONVERT(INT, LEN(T.States))
                 AND SUBSTRING(', ' + T.States, Number, LEN(', ')) = ', ') N
), T3 AS
(
    SELECT T1.Div, T1.Dept, T1.Item, COUNT(*) N
    FROM T1 
    LEFT JOIN T2
        ON T1.Div = T2.Div
        AND T1.Dept = T2.Dept
        AND T1.Item = T2.Item
    GROUP BY T1.Div, T1.Dept, T1.Item
)
SELECT  A.Div, 
        A.Dept, 
        States = STUFF((SELECT  ',' + CONVERT(VARCHAR(20), Item) + 
                                ' - (' + CAST(N AS VARCHAR(4)) + ')'
                        FROM T3 
                        WHERE Div = A.Div
                        AND Dept = A.Dept
                    FOR XML PATH(''), TYPE).value('.[1]','nvarchar(max)'),1,1,'')
FROM T3 A
ORDER BY Div, Dept, Item

结果是:

╔══════╦═══════╦════════════════════════════╗
║ Div  ║ Dept  ║           States           ║
╠══════╬═══════╬════════════════════════════╣
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div1 ║ Dept1 ║ CA - (3),NV - (2),TX - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div2 ║ Dept2 ║ IN - (1),MI - (2),OH - (2) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div3 ║ Dept2 ║ NJ - (1),NY - (1),PA - (1) ║
║ Div4 ║ Dept1 ║ NULL                       ║
╚══════╩═══════╩════════════════════════════╝

答案 1 :(得分:6)

您的要求非常恶劣,但作为开发人员,我们必须使用我们所拥有的。这是一个广泛使用公用表表达式(CTE)的解决方案:

;WITH
    CTE1 AS
    (
        SELECT      Div, Dept,
                    REPLACE(States,' ','') + ',' AS States
        FROM        Table2
    ),
    CTE2 AS
    (
        SELECT      c1.Div, c1.Dept,
                    LEFT(c1.States,CHARINDEX(',', c1.States)-1)                 AS IndividualState,
                    RIGHT(c1.States,LEN(c1.States)-CHARINDEX(',', c1.States))   AS RemainingStates
        FROM        CTE1    c1
        UNION ALL
        SELECT      c2.Div, c2.Dept,
                    LEFT(c2.RemainingStates,CHARINDEX(',', c2.RemainingStates)-1),
                    RIGHT(c2.RemainingStates,LEN(c2.RemainingStates) - CHARINDEX(',', c2.RemainingStates))
        FROM        CTE2    c2
        WHERE       LEN(c2.RemainingStates) > 0
    ),
    CTE3 AS
    (
        SELECT      Div, Dept,
                    IndividualState,
                    COUNT(*)            AS StateCount
        FROM        CTE2
        GROUP BY    Div, Dept, IndividualState
    ),
    CTE4 AS
    (
        SELECT      t1.Div, t1.Dept,
                    (
                        SELECT  c3.IndividualState + ' - (' + CONVERT(varchar(10),c3.StateCount) + '), ' 
                        FROM    CTE3 c3
                        WHERE   c3.Div = t1.Div AND c3.Dept = t1.Dept
                        FOR XML PATH('')
                    )       AS States
        FROM        Table1  t1
    )

SELECT  Div, Dept,
        LEFT(States, LEN(States) - 1) AS States
FROM    CTE4

解释

  1. CTE1清除Table2中的数据:删除空格,在末尾添加逗号
  2. CTE2进行规范化
  3. CTE3进行计数
  4. CTE4执行最终程序集,将CA | 3放入CA - (3), ...
  5. 最后SELECT删除整数输出的尾随逗号。

    为了更好地理解每个步骤,您可以将最终的SELECT替换为SELECT * FROM CTE1SELECT * FROM CTE2等。

答案 2 :(得分:2)

理想情况下,这些数据当然会被标准化,因为这很麻烦。鉴于您已经坚持使用此结构,我认为您最好的选择是使用状态驱动程序表或使用可用的许多拆分函数之一来拆分分隔值,然后使用它来获取每个陈述自己的路线:

;WITH cte AS (SELECT DISTINCT b.Div,b.Dept,a.abbrs 
                FROM #States a
                RIGHT JOIN #Table1 b
                 ON ','+REPLACE(b.States,' ','')+',' LIKE '%,'+a.abbrs+',%'
              )
     ,cte2 AS (SELECT b.Div,b.Dept,a.abbrs 
                FROM #States a
                JOIN #Table2 b
                 ON ','+REPLACE(b.States,' ','')+',' LIKE '%,'+a.abbrs+',%'
              )
     ,cte3 AS (SELECT a.Div,a.Dept,a.abbrs,CAST(COUNT(b.abbrs)AS VARCHAR(25)) CT 
                FROM  cte a
                LEFT JOIN cte2 b
                 ON a.Dept = b.Dept
                 AND a.Div = b.Div
                 AND a.abbrs = b.abbrs
                GROUP BY a.div,a.dept,a.abbrs
              )
SELECT DISTINCT
      Div,Dept
      ,STUFF((SELECT DISTINCT ',' +   abbrs+'-('+CT+')'
                                    FROM cte3 b                         
                                    WHERE a.Div = b.Div
                                      AND a.Dept = b.Dept
                                    FOR XML PATH(''), TYPE).value('.', 'VARCHAR(MAX)') 
                                    ,1,1,'')

FROM  cte3 a

演示:SQL Fiddle

注意:

  • cte1 - 从table1
  • 创建不同的div / dept / state列表
  • cte2 - 从table2
  • 创建所有div / dept / states的列表
  • cte3 - 在div / dept / state上聚合以获取计数

输出是:

|  DIV |  DEPT |               STATES |
|------|-------|----------------------|
| Div1 | Dept1 | CA-(3),NV-(2),TX-(2) |
| Div2 | Dept2 | IN-(1),MI-(2),OH-(2) |
| Div3 | Dept2 | NJ-(1),NY-(1),PA-(1) |
| Div4 | Dept1 |               (null) |

更新了小提琴以包含您的NULL行并添加了输出。