目前,我的数据集结构如下:
static initializer called
instance initializer called
constructor called
instance initializer called
constructor called
一些样本日期:
CREATE TABLE notes (
date DATE NOT NULL,
author VARCHAR(100) NOT NULL,
type CHAR NOT NULL,
line_number INT NOT NULL,
note VARCHAR(4000) NOT NULL
);
此数据将迁移到定义为:
的新SQL Server结构Date, Author, Type, Line Number, Note
2015-01-01, Abe, C, 1, First 4000 character string
2015-01-01, Abe, C, 2, Second 4000 character string
2015-01-01, Abe, C, 3, Third 4000 character string
2015-01-01, Bob, C, 1, First 4000 character string
2015-01-01, Bob, C, 2, Second 1000 character string
2015-01-01, Cal, C, 1, First 3568 character string
我想为多行添加前缀(组合时超过8000个字符的前缀)注意" 日期 - 作者 - Part Y //"的X ,并在连接的字符串之间放置一个空格,这样数据最终会像:
CREATE TABLE notes (
date DATE NOT NULL,
author VARCHAR(100) NOT NULL,
type CHAR NOT NULL,
note VARCHAR(8000) NOT NULL
);
我正在寻找实现这种转变的方法。最初,我有一个简单组合(合并)所有Note字符串的中间步骤,其中Date,Author,Type共享但无法拆分。
答案 0 :(得分:1)
好的,所以,这是一个挑战,但我最终到了那里。从我的日常工作中分散出来是非常令人愉快的:D
代码假设您永远不会有超过72,000个字符的注释,因为Part x in y
前缀添加了多少额外文本的逻辑假定x
和y
是一位数字。这可以通过用前导零填充任何单个数字来轻松解决,这也可以确保排序正确。
如果您需要解释任何内容,代码中的注释应该足够:
-- Declare the test data:
declare @a table ([Date] date
,author varchar(100)
,type char
,line_number int
,note varchar(8000)
,final_line int
,new_lines int
)
insert into @a values
('2015-01-01','Abel','C',1,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Abel','C',2,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Abel','C',3,'This is a note that is 83 characters long------------------------------------------' ,null,null)
,('2015-01-01','Bob' ,'C',1,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Bob' ,'C',2,'This is a note that is 43 characters long--' ,null,null)
,('2015-01-01','Cal' ,'C',1,'This is a note that is 50 characters long---------' ,null,null)
---------------------------------------
-- Start the actual data processing. --
---------------------------------------
declare @MaxFieldLen decimal(10,2) = 100 -- Set this to your 8000 characters limit you have. I have used 100 so I didn't have to generate and work with really long text values.
-- Create Numbers table. This will perform better if created as a permanent table:
if object_id('tempdb..#Numbers') is not null
drop table #Numbers
;with e00(n) as (select 1 union all select 1)
,e02(n) as (select 1 from e00 a, e00 b)
,e04(n) as (select 1 from e02 a, e02 b)
,e08(n) as (select 1 from e04 a, e04 b)
,e16(n) as (select 1 from e08 a, e08 b)
,e32(n) as (select 1 from e16 a, e16 b)
,cte(n) as (select row_number() over (order by n) from e32)
select n-1 as Number
into #Numbers
from cte
where n <= 1000001
-- Calculate some useful figures to be used in chopping up the total note. This will need to be done across the table before doing anything else:
update @a
set final_line = t.final_line
,new_lines = t.new_lines
from @a a
inner join (select Date
,author
,type
,max(line_number) as final_line -- We only want the final line from the CTE later on, so we need a way of identifying that the line_number we are working with the last one.
-- Calculate the total number of lines that will result from the additional text being added:
,case when sum(len(note)) > @MaxFieldLen -- If the Note is long enough to be broken into two lines:
then ceiling( -- Find the next highest integer value for
sum(len(note)) -- the total length of all the notes
/ (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_')) -- divided by the max note size allowed minus the length of the additional text.
)
else 1 -- Otherwise return 1.
end as new_lines
from @a
group by Date
,author
,type
) t
on a.Date = t.Date
and a.author = t.author
and a.type = t.type
-- Combine the Notes using a recursive cte:
;with cte as
(
select Date
,author
,type
,line_number
,final_line
,note
,new_lines
from @a
where line_number = 1
union all
select a.Date
,a.author
,a.type
,a.line_number
,a.final_line
,c.note + a.note
,a.new_lines
from cte c
join @a a
on c.Date = a.Date
and c.author = a.author
and c.type = a.type
and c.line_number+1 = a.line_number
)
select c1.Date
,c1.author
,c1.type
,c2.note
from cte c1
cross apply (select case when c1.new_lines > 1 -- If there is more than one line to be returned, build up the prefix:
then convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part ' + cast(Number+1 as nvarchar(10)) + ' of ' + cast(c1.new_lines as nvarchar(10)) + ' // '
+ substring(c1.note -- and then append the next (Max note length - Generated prefix) number of characters in the note:
,1 + Number * (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
,(@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))-1
)
else c1.note
end as note
from #Numbers
where Number >= 0
and Number < case when c1.new_lines = 1
then 1
else len(c1.note) / (@MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
end
) c2
where line_number = final_line
order by 1,2,3,4