将包含空格的文本分隔为单独的列

时间:2018-04-18 09:39:11

标签: sql sql-server string tsql split

我有一个看起来像这样的Textcol:

文字示例:

Note:test note
Phone Call: Fairview Wines & Spirits
Number Called: 1 604  601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09

如果可能的话,我希望将主题,结果和注释分成四个单独的列。我尝试了两个接近:

第一个例子:

SELECT "Note"=SUBSTRING(TextCol,1,25),"Subject"=SUBSTRING(TextCol,110,25)
 ,"Result"=SUBSTRING(TextCol,135,33) FROM AMGR_Notes where type = 2

第二个例子:

 SELECT
    RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.SubjectStart + 8, A.ResultStart - A.SubjectStart - 8), CHAR(10), ''), CHAR(13), ''))) AS [Subject]
    ,RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.ResultStart + 7, A.DurationStart - A.ResultStart - 7), CHAR(10), ''), CHAR(13), ''))) AS Result
    ,RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, 6, A.PhoneStart - 6), CHAR(10), ''), CHAR(13), ''))) AS Note
FROM AMGR_Notes T
    CROSS APPLY
    (
        VALUES
        (
            CHARINDEX('Phone Call:', T.TextCol)
            ,CHARINDEX('Subject:', T.TextCol)
            ,CHARINDEX('Result:', T.TextCol)
            ,CHARINDEX('Duration:', T.TextCol)

        )
    ) A (PhoneStart, SubjectStart, ResultStart, DurationStart);

第二个例子给出了一个错误:

  

消息537,级别16,状态3,行1传递给的长度参数无效   LEFT或SUBSTRING功能。

对于SQL,我不是超级genuis,所以我不理解错误。 我也想知道在这种情况下CASE语句是否可能更好用

非常感谢任何帮助

4 个答案:

答案 0 :(得分:1)

使用如下的案例陈述计算出来:

SELECT
CASE
    WHEN A.ResultStart - A.SubjectStart >= 8
    THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.SubjectStart + 8, A.ResultStart - A.SubjectStart - 8), CHAR(10), ''), CHAR(13), '')))
    ELSE ''
END
AS [Subject],
CASE
    WHEN A.DurationStart - A.ResultStart >= 7
    THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.ResultStart + 7, A.DurationStart - A.ResultStart - 7), CHAR(10), ''), CHAR(13), '')))
    ELSE ''
END
AS Result,
CASE
    WHEN A.DurationStart - A.ResultStart >= 7
    THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, A.DurationStart + 9, A.DurationStart - A.ResultStart - 9), CHAR(10), ''), CHAR(13), '')))
    ELSE ''
END
AS Duration,
CASE
    WHEN A.PhoneStart >= 6
    THEN RTRIM(LTRIM(REPLACE(REPLACE(SUBSTRING(T.TextCol, 6, A.PhoneStart
- 6), CHAR(10), ''), CHAR(13), '')))
    ELSE ''
END
AS Note
FROM AMGR_Notes T
CROSS APPLY (
    VALUES
    (
    CHARINDEX('Phone Call:', T.TextCol)
    ,CHARINDEX('Subject:', T.TextCol)
    ,CHARINDEX('Result:', T.TextCol)
    ,CHARINDEX('Duration:', T.TextCol)
    )
) A (PhoneStart, SubjectStart, ResultStart, DurationStart)


where t.type = '2'

答案 1 :(得分:1)

我认为只需使用CROSS APPLY即可实现超级干净且高效。这里有一些动态内联函数逻辑来处理单个字符串(原谅我的命名 - 我匆匆忙忙):

DECLARE @string varchar(8000) = 
'Phone Call: Fairview Wines & Spirits
Number Called: 1 604  601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09';

SELECT  
  [Phone Call] = MAX(CASE s.n WHEN 1 THEN here.yougo END),
  [Subject]    = MAX(CASE s.n WHEN 2 THEN here.yougo END),
  [Result]     = MAX(CASE s.n WHEN 3 THEN here.yougo END),
  [Duration]   = MAX(CASE s.n WHEN 4 THEN here.yougo END)
FROM (VALUES (@string)) t(s) -- target.string
CROSS APPLY (VALUES (1,'Phone Call:'),(2,'Subject:'),(3,'Result:'),(4,'Duration:')) s(n,t) -- search.text
CROSS APPLY (VALUES (CHARINDEX(s.t, t.s), LEN(s.t))) st(l,ln)                              -- start.location, start.length
CROSS APPLY (VALUES (ISNULL(NULLIF(CHARINDEX(char(10), t.s, st.l+st.ln+1),0),8000))) e(ln) -- end line
CROSS APPLY (VALUES (SUBSTRING(t.s, st.l+st.ln, e.ln-(st.l+st.ln)))) here(yougo);

返回:

Phone Call                 Subject          Result              Duration 
-------------------------- ---------------- ------------------- ---------
Fairview Wines & Spirits   Arrange meeting  Arranged interview  00:00:09

执行计划非常棒 - 总的子树成本为0.0000071(在我的电脑上)。要将此逻辑应用于表,您可以执行此操作:

DECLARE @AMGR_Notes TABLE (someid int identity primary key, somestring varchar(8000));
INSERT @AMGR_Notes VALUES 
('Phone Call: ACME Treats
Number Called: 1 604 555-9988
Phone Description: Old School Landline
Subject: Buy Ice Cream
Result: He sold me some
Duration: 00:00:01'),
('Phone Call: Fairview Wines & Spirits
Number Called: 1 604  601-8000
Phone Description: Main
Subject: Arrange meeting
Result: Arranged interview
Duration: 00:00:09');

SELECT 
  t.someid,
  [Phone Call] = MAX(CASE s.n WHEN 1 THEN here.yougo END),
  [Subject]    = MAX(CASE s.n WHEN 2 THEN here.yougo END),
  [Result]     = MAX(CASE s.n WHEN 3 THEN here.yougo END),
  [Duration]   = MAX(CASE s.n WHEN 4 THEN here.yougo END)
FROM @AMGR_Notes tt
CROSS APPLY (VALUES (tt.someid,tt.somestring)) t(someid,s) -- target.string
CROSS APPLY (VALUES (1,'Phone Call:'),(2,'Subject:'),(3,'Result:'),(4,'Duration:')) s(n,t) -- search.text
CROSS APPLY (VALUES (CHARINDEX(s.t, t.s), LEN(s.t))) st(l,ln)                              -- start.location, start.length
CROSS APPLY (VALUES (ISNULL(NULLIF(CHARINDEX(char(10), t.s, st.l+st.ln+1),0),8000))) e(ln) -- end line
CROSS APPLY (VALUES (SUBSTRING(t.s, st.l+st.ln, e.ln-(st.l+st.ln)))) here(yougo)
GROUP BY t.someid;

MAX-CASE逻辑是另一种转向的方式,但更清洁,更容易IMO(感谢Jeff Moden教我技术。)这里的性能关键(对于使用数据透视)也是为了确保有您要分组的列的索引,在我的示例中,是someid列。

答案 2 :(得分:0)

试试这样:

DECLARE @tbl TABLE(ID INT IDENTITY, TextCol VARCHAR(500));
INSERT INTO @tbl VALUES
 ('Phone Call: Fairview Wines & Spirits
    Number Called: 1 604  601-8000
    Phone Description: Main
    Subject: Arrange meeting
    Result: Arranged interview
    Duration: 00:00:09
');

WITH RowWise AS
(
    SELECT CAST('<x>' + REPLACE((SELECT REPLACE(TextCol,CHAR(13),'') AS [*] FOR XML PATH('')),CHAR(10),'</x><x>')  + '</x>' AS XML) AS OneRow
    FROM @tbl 
)
,EachRow AS
(
    SELECT LTRIM(RTRIM(r.value('text()[1]','nvarchar(max)'))) AS RowText
    FROM RowWise
    CROSS APPLY OneRow.nodes('/x[text()]') AS A(r)
)
SELECT LEFT(RowText,CHARINDEX(':',RowText)-1)
      ,SUBSTRING(RowText,CHARINDEX(':',RowText)+1,1000)
FROM EachRow;

结果

Phone Call           Fairview Wines & Spirits
Number Called        1 604  601-8000
Phone Description    Main
Subject              Arrange meeting
Result               Arranged interview
Duration             00:00:09

更新

添加PIVOT以便在一行中获取

WITH PlainLineBreak AS
(
    SELECT ID
          ,REPLACE(TextCol,CHAR(13),CHAR(10)) AS TextCol
    FROM @tbl
)
,LineWise AS
(
    SELECT CAST('<x>' + REPLACE((SELECT TextCol AS [*] FOR XML PATH('')),CHAR(10),'</x><x>')  + '</x>' AS XML) AS OneLine
    FROM PlainLineBreak 
)
,EachLine AS
(
    SELECT LTRIM(RTRIM(r.value('text()[1]','nvarchar(max)'))) AS LineText
    FROM LineWise
    CROSS APPLY OneLine.nodes('/x[text()]') AS A(r)
)
SELECT p.*
FROM
(
    SELECT LEFT(LineText,CHARINDEX(':',LineText)-1) AS ColumnName
          ,SUBSTRING(LineText,CHARINDEX(':',LineText)+1,1000) AS ColumnValue
    FROM EachLine
) AS t
PIVOT
(
    MAX(ColumnValue) FOR ColumnName IN([Note],[Phone Call],[Number Called],[Phone Description],[Subject],[Result],[Duration])
) As p

结果

+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+
| Note      | Phone Call               | Number Called   | Phone Description | Subject         | Result             | Duration |
+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+
| test note | Fairview Wines & Spirits | 1 604  601-8000 | Main              | Arrange meeting | Arranged interview | 00:00:09 |
+-----------+--------------------------+-----------------+-------------------+-----------------+--------------------+----------+

答案 3 :(得分:-1)

最简单的方法可能是:

FROM AMGR_Notes T CROSS APPLY
     (VALUES (NULLIF(CHARINDEX('Phone Call:', T.TextCol), 0)
              NULLIF(CHARINDEX('Subject:', T.TextCol), 0),
              NULLIF(CHARINDEX('Result:', T.TextCol), 0),
              NULLIF(CHARINDEX('Duration:', T.TextCol), 0)
             )
     ) A(PhoneStart, SubjectStart, ResultStart, DurationStart);