使用SQL来转换和组合字符串

时间:2016-09-09 13:04:59

标签: sql sql-server tsql text

目前,我的数据集结构如下:

static initializer called
instance initializer called
constructor called
instance initializer called
constructor called

一些样本日期:

CREATE TABLE notes (
    date DATE NOT NULL,
    author VARCHAR(100) NOT NULL,
    type CHAR NOT NULL,
    line_number INT NOT NULL,
    note VARCHAR(4000) NOT NULL
);

此数据将迁移到定义为:

的新SQL Server结构
Date, Author, Type, Line Number, Note
2015-01-01, Abe, C, 1, First 4000 character string
2015-01-01, Abe, C, 2, Second 4000 character string
2015-01-01, Abe, C, 3, Third 4000 character string
2015-01-01, Bob, C, 1, First 4000 character string
2015-01-01, Bob, C, 2, Second 1000 character string
2015-01-01, Cal, C, 1, First 3568 character string

我想为多行添加前缀(组合时超过8000个字符的前缀)注意" 日期 - 作者 - Part Y //"的X ,并在连接的字符串之间放置一个空格,这样数据最终会像:

CREATE TABLE notes (
    date DATE NOT NULL,
    author VARCHAR(100) NOT NULL,
    type CHAR NOT NULL,
    note VARCHAR(8000) NOT NULL
);

我正在寻找实现这种转变的方法。最初,我有一个简单组合(合并)所有Note字符串的中间步骤,其中Date,Author,Type共享但无法拆分。

1 个答案:

答案 0 :(得分:1)

好的,所以,这是一个挑战,但我最终到了那里。从我的日常工作中分散出来是非常令人愉快的:D

代码假设您永远不会有超过72,000个字符的注释,因为Part x in y前缀添加了多少额外文本的逻辑假定xy是一位数字。这可以通过用前导零填充任何单个数字来轻松解决,这也可以确保排序正确。

如果您需要解释任何内容,代码中的注释应该足够:

-- Declare the test data:
declare @a table ([Date] date
                    ,author varchar(100)
                    ,type char
                    ,line_number int
                    ,note varchar(8000)
                    ,final_line int
                    ,new_lines int
                    )
insert into @a values
 ('2015-01-01','Abel','C',1,'This is a note that is 100 characters long----------------------------------------------------------'  ,null,null)
,('2015-01-01','Abel','C',2,'This is a note that is 100 characters long----------------------------------------------------------'  ,null,null)
,('2015-01-01','Abel','C',3,'This is a note that is 83 characters long------------------------------------------'                   ,null,null)
,('2015-01-01','Bob' ,'C',1,'This is a note that is 100 characters long----------------------------------------------------------'  ,null,null)
,('2015-01-01','Bob' ,'C',2,'This is a note that is 43 characters long--'                                                           ,null,null)
,('2015-01-01','Cal' ,'C',1,'This is a note that is 50 characters long---------'                                                    ,null,null)



---------------------------------------
-- Start the actual data processing. --
---------------------------------------

declare @MaxFieldLen decimal(10,2) = 100    -- Set this to your 8000 characters limit you have.  I have used 100 so I didn't have to generate and work with really long text values.


-- Create Numbers table.  This will perform better if created as a permanent table:
if object_id('tempdb..#Numbers') is not null
drop table #Numbers

;with e00(n) as (select 1 union all select 1)
        ,e02(n) as (select 1 from e00 a, e00 b)
        ,e04(n) as (select 1 from e02 a, e02 b)
        ,e08(n) as (select 1 from e04 a, e04 b)
        ,e16(n) as (select 1 from e08 a, e08 b)
        ,e32(n) as (select 1 from e16 a, e16 b)
        ,cte(n) as (select row_number() over (order by n) from e32)
select n-1 as Number
into #Numbers
from cte
where n <= 1000001



-- Calculate some useful figures to be used in chopping up the total note.  This will need to be done across the table before doing anything else:
update @a
set final_line = t.final_line
    ,new_lines = t.new_lines
from @a a
    inner join (select Date
                        ,author
                        ,type
                        ,max(line_number) as final_line     -- We only want the final line from the CTE later on, so we need a way of identifying that the line_number we are working with the last one.

                        -- Calculate the total number of lines that will result from the additional text being added:
                        ,case when sum(len(note)) > @MaxFieldLen                                                                                            -- If the Note is long enough to be broken into two lines:
                            then ceiling(                                                                                                                   -- Find the next highest integer value for
                                        sum(len(note))                                                                                                      -- the total length of all the notes
                                            / (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))                -- divided by the max note size allowed minus the length of the additional text.
                                        )
                            else 1                                                                                                                          -- Otherwise return 1.
                            end as new_lines
                from @a
                group by Date
                        ,author
                        ,type
                ) t
        on a.Date = t.Date
            and a.author = t.author
            and a.type = t.type



-- Combine the Notes using a recursive cte:
;with cte as
(
    select Date
            ,author
            ,type
            ,line_number
            ,final_line
            ,note
            ,new_lines
    from @a
    where line_number = 1

    union all

    select a.Date
            ,a.author
            ,a.type
            ,a.line_number
            ,a.final_line
            ,c.note + a.note
            ,a.new_lines
    from cte c
        join @a a
            on c.Date = a.Date
                and c.author = a.author
                and c.type = a.type
                and c.line_number+1 = a.line_number

)
select c1.Date
        ,c1.author
        ,c1.type
        ,c2.note
from cte c1
    cross apply (select case when c1.new_lines > 1      -- If there is more than one line to be returned, build up the prefix:
                            then  convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part ' + cast(Number+1 as nvarchar(10)) + ' of ' + cast(c1.new_lines as nvarchar(10)) + ' // '
                                    + substring(c1.note -- and then append the next (Max note length - Generated prefix) number of characters in the note:
                                                ,1 + Number * (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
                                                ,(@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))-1
                                                )
                            else c1.note
                            end as note
                    from #Numbers
                    where Number >= 0
                        and Number < case when c1.new_lines = 1
                                            then 1
                                            else len(c1.note) / (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
                                            end
                ) c2
where line_number = final_line
order by 1,2,3,4