我有一个包含设置列的表,它们的格式如下:
SETTING_NAME = setting_value | SETTING_NAME = setting_value | SETTING_NAME = setting_value
问题在于,填充了哪些设置会有很大差异。我想拆分所有值并以更好的方式存储它们。
目前它看起来像这样:
我希望如此:
为了达到目的,我使用了一个函数来分割值。然后我将它们组合在一起并使用子字符串来获取属于setting_name的setting_value。这是我到目前为止所得到的:
/*
create function [dbo].[split_to_columns](@text varchar(8000)
, @column tinyint
, @separator char(1))
returns varchar(8000)
as
begin
declare @pos_start int = 1
declare @pos_end int = charindex(@separator, @text, @pos_start)
while (@column > 1 and @pos_end > 0)
begin
set @pos_start = @pos_end + 1
set @pos_end = charindex(@separator, @text, @pos_start)
set @column = @column - 1
end
if @column > 1 set @pos_start = len(@text) + 1
if @pos_end = 0 set @pos_end = len(@text) + 1
return substring(@text, @pos_start, @pos_end - @pos_start)
end
*/
create table #settings(id int, setting varchar(255))
insert into #settings(id, setting) values(1,'setting1=a|setting2=b|setting3=c')
insert into #settings(id, setting) values(2,'setting1=d|setting2=e')
insert into #settings(id, setting) values(3,'setting1=f|setting3=g')
insert into #settings(id, setting) values(4,'setting2=h')
;
with cte as (
select id, dbo.split_to_columns(setting, 1, '|') as setting from #settings
union select id, dbo.split_to_columns(setting, 2, '|') from #settings
union select id, dbo.split_to_columns(setting, 3, '|') from #settings
)
select distinct
x.id
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting1=%' and id = x.id) as setting1
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting2=%' and id = x.id) as setting2
, (select substring(setting, charindex('=', setting) + 1, 255) from cte where setting like 'setting3=%' and id = x.id) as setting3
from cte x
drop table #settings
我是以正确的方式做到这一点的吗?我不禁想到自己太复杂了。虽然我不是现在设置格式的忠实粉丝,但我确实经常看到它。这意味着更多的人必须做这个伎俩...
修改
我将图片属性导入数据库。上面提到的设置是图片属性,id是设置所属图片的名称。
一栏中的设置示例:
全名= d:\ 8.JPG |文件夹名称= d:\ |文件名= 8.JPG |大小= 7284351 |扩展= .JPG | datePictureTaken = 2017年10月3日 11:53:38 | ApertureValue = 2 |日期时间= 2017年10月3日 11:53:38 |数字化日期时间= 2017年10月3日 11:53:38 |原始日期时间= 2017年10月3日 11:53:38 | ExposureTime = 0,0025706940874036 |长焦点= 3,65 | GPSAltitude = 43 | GPSDateStamp = 2017年10月3日 0:00:00 |型号= qcam的-AA | ShutterSpeedValue = 8604
这就是我希望以上述方式进行格式化的原因。
答案 0 :(得分:2)
我会将文本转换为XML的基本块,以便我们可以采用基于集合的方法将数据转换为您想要的结果:
declare @settings table(id int, setting varchar(255))
insert into @settings (id,setting) values
(1,'setting1=a|setting2=b|setting3=c'),
(2,'setting1=d|setting2=e'),
(3,'setting1=f|setting3=g'),
(4,'setting2=h')
;with Xmlised (id,detail) as (
select id,CONVERT(xml,'<prob><setting name="' +
REPLACE(
REPLACE(setting,'=','">'),
'|','</setting><setting name="') + '</setting></prob>')
from @settings
), shredded as (
select
x.id,
S.value('./@name','varchar(50)') as name,
S.value('./text()[1]','varchar(100)') as value
from
Xmlised x
cross apply
detail.nodes('prob/setting') as T(S)
)
select
id,setting1,setting2,setting3
from
shredded
pivot (MAX(value) for name in (setting1,setting2,setting3)) u
希望我已经把它分解成足够的步骤,你可以看到它正在做什么以及如何做。
结果:
id setting1 setting2 setting3
----------- --------- --------- ---------
1 a b c
2 d e NULL
3 f NULL g
4 NULL h NULL
正如Sean在评论中所建议的那样,我通常不会考虑存储枢轴结果,并且通常会跳过该步骤
答案 1 :(得分:0)
WITH很慢。我建议存储设置名称,值和某种组ID的表。例如:
CREATE TABLE [dbo].[settings_table](
[id] [int] NULL,
[group] [int] NULL,
[name] [nchar](10) NULL,
[value] [nchar](10) NOT NULL
) ON [PRIMARY]
我不确切知道你的程序在这些设置中做了什么,但从长远来看这种结构会更有效。
答案 2 :(得分:0)
我会做以下3个步骤:
1)创建一个通用的Split函数。这是我使用的那个:
CREATE FUNCTION Split(
@StringToSplit VARCHAR(MAX)
,@Delimiter VARCHAR(10)
)
RETURNS @SplitResult TABLE (id int, item VARCHAR(MAX))
BEGIN
DECLARE @item VARCHAR(8000)
DECLARE @counter int = 1
WHILE CHARINDEX(@Delimiter, @StringToSplit,0) <> 0
BEGIN
SELECT
@item = RTRIM(LTRIM(SUBSTRING(@StringToSplit,1, CHARINDEX(@Delimiter,@StringToSplit,0)-1))),
@StringToSplit = RTRIM(LTRIM(SUBSTRING(@StringToSplit, CHARINDEX(@Delimiter,@StringToSplit,0) + LEN(@Delimiter), LEN(@StringToSplit))))
IF LEN(@item) > 0
INSERT INTO @SplitResult SELECT @counter, @item
SET @counter = @counter + 1
END
IF LEN(@StringToSplit) > 0
INSERT INTO @SplitResult SELECT @counter,@StringToSplit
SET @counter = @counter + 1
RETURN
END
GO
-- You use it like this
SELECT S.id, T.item FROM #settings AS S CROSS APPLY Split(S.setting, '|') AS T
2)拆分设置并将设置名称与其值分开。
SELECT
S.id,
T.item,
SettingName = SUBSTRING(T.item, 1, CHARINDEX('=', T.item, 1) - 1), -- -1 to not include the "="
SettingValue = SUBSTRING(T.item, CHARINDEX('=', T.item, 1) + 1, 100) -- +1 to not include the "="
FROM
#settings AS S
CROSS APPLY Split(S.setting, '|') AS T
3)按名称旋转已知设置:
;WITH SplitValues AS
(
SELECT
S.id,
SettingName = SUBSTRING(T.item, 1, CHARINDEX('=', T.item, 1) - 1), -- -1 to not include the "="
SettingValue = SUBSTRING(T.item, CHARINDEX('=', T.item, 1) + 1, 100) -- +1 to not include the "="
FROM
#settings AS S
CROSS APPLY Split(S.setting, '|') AS T
)
SELECT
P.id,
P.setting1,
P.setting2,
P.setting3
FROM
SplitValues AS S
PIVOT (
MAX(S.SettingValue) FOR SettingName IN ([setting1], [setting2], [setting3])
) AS P
答案 3 :(得分:0)
对于设置列(照片属性),我同意连续列中的列 使用正确的类型,例如DateTime,Int,Numeric,因为您可以搜索范围,排序,它更有效。
我知道你要求SQL,但我会在.NET中这样做,因为你需要做一些清理,比如从整数中删除逗号。在现实生活中从文件中读取行,这样就可以打开命令(插入)。
public static void ParsePhoto(string photo)
{
if(string.IsNullOrEmpty(photo))
{
photo = @"FullName = D:\8.jpg | FolderName = D:\| FileName = 8.jpg | Size = 7284351 | Extension =.jpg | datePictureTaken = 10 - 3 - 2017 11:53:38 | ApertureValue = 2 | DateTime = 10 - 3 - 2017 11:53:38 | DateTimeDigitized = 10 - 3 - 2017 11:53:38 | DateTimeOriginal = 10 - 3 - 2017 11:53:38 | ExposureTime = 0,0025706940874036 | FocalLength = 3,65 | GPSAltitude = 43 | GPSDateStamp = 10 - 3 - 2017 0:00:00 | Model = QCAM - AA | ShutterSpeedValue = 8,604";
}
List<KeyValuePair<string, string>> kvp = new List<KeyValuePair<string, string>>();
foreach(string s in photo.Trim().Split(new char[] {'|'}, StringSplitOptions.RemoveEmptyEntries))
{
string[] sp = s.Split(new char[] { '=' }, StringSplitOptions.RemoveEmptyEntries);
if (sp.Count() == 2)
{
kvp.Add(new KeyValuePair<string, string>(sp[0].Trim(), sp[1].Trim()));
}
else
{
throw new IndexOutOfRangeException("bad photo");
}
}
foreach(KeyValuePair<string, string> pair in kvp)
{
Debug.WriteLine($"{pair.Key} = {pair.Value}");
//build up and execute insert statement here
}
Debug.WriteLine("Done");
}
FullName = D:\8.jpg
FolderName = D:\
FileName = 8.jpg
Size = 7284351
Extension = .jpg
datePictureTaken = 10 - 3 - 2017 11:53:38
ApertureValue = 2
DateTime = 10 - 3 - 2017 11:53:38
DateTimeDigitized = 10 - 3 - 2017 11:53:38
DateTimeOriginal = 10 - 3 - 2017 11:53:38
ExposureTime = 0,0025706940874036
FocalLength = 3,65
GPSAltitude = 43
GPSDateStamp = 10 - 3 - 2017 0:00:00
Model = QCAM - AA
ShutterSpeedValue = 8,604
答案 4 :(得分:0)
如果性能很重要,您可以轻松完成此操作,无需拆分器功能,将数据转换为XML或进行任何转动。这种技术通常被称为Cascading CROSS APPLY。代码有点冗长,但性能回报令人惊讶。首先是解决方案:
SELECT
id,
setting1 = substring(setting, s1.p+1, x1.x),
setting2 = substring(setting, s2.p+1, x2.x),
setting3 = substring(setting, s3.p+1, x3.x)
FROM #settings t
CROSS APPLY (VALUES (nullif(charindex('setting1=', t.setting),0)+8)) s1(p)
CROSS APPLY (VALUES (nullif(charindex('setting2=', t.setting),0)+8)) s2(p)
CROSS APPLY (VALUES (nullif(charindex('setting3=', t.setting),0)+8)) s3(p)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s1.p),0)-s1.p-1, 1))) x1(x)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s2.p),0)-s2.p-1, 1))) x2(x)
CROSS APPLY (VALUES (isnull(nullif(charindex('|',t.setting,s3.p),0)-s3.p-1, 1))) x3(x);
请注意执行计划:
我没有时间进行性能测试,但根据执行计划,级联交叉应用技术的速度大约快44,000倍。
答案 5 :(得分:0)
试试这个:
declare @table table (id int, setting varchar(100))
insert into @table values
(1,'setting1=a|setting2=b|setting3=c'),
(2,'setting1=d|setting2=e'),
(3,'setting1=f|setting3=g'),
(4,'setting2=h')
select id,
case when charindex('setting1=',setting) = 0 then null else SUBSTRING(setting, charindex('setting1=',setting) + 9, 1) end [setting1],
case when charindex('setting2=',setting) = 0 then null else SUBSTRING(setting, charindex('setting2=',setting) + 9, 1) end [setting2],
case when charindex('setting3=',setting) = 0 then null else SUBSTRING(setting, charindex('setting3=',setting) + 9, 1) end [setting3]
from @table