我有一个导入sql server 2008 r2的遗留数据库,它不包含索引,也没有数百个表的主键/外键(有些表有数百列)。我手动识别的主要键(有些是复合键)通常在列顺序中很高,但是如果我能帮助的话,我不会花费数周的时间来完成它们。
是否有任何工具或脚本可用于分析数据的基数,以建议或编写可能的主键?
到目前为止,我已经找到了following script,但它会引发一些错误。我将调试它,看看我是否可以解决出现问题但是想知道是否有人遇到类似的问题并设法使某些工作正常。
链接中的脚本文本位于
之下--Natural Key Finder
--blindman, 6/2005
--This script searches up to 63 columns of any table for permutations of fields that represent unique keys within the dataset.
--The search can be limited by the maximum columns per key, and by the maximum minutes for processing.
declare @TableName varchar(50)
declare @MaxColumns int
declare @MaxMinutes int
--Get a suitable test table from the current database
/*
set @TableName =
(select top 1 sysobjects.name
from sysobjects
inner join sysindexes
on sysobjects.id = sysindexes.id
and indid in (0, 1)
inner join syscolumns on sysobjects.id = syscolumns.id
where sysobjects.type = 'U'
group by sysobjects.name,
sysindexes.rowcnt
having count(*) between 5 and 15
order by rowcnt * count(*) desc)
*/
set @TableName = 'calendarEvents' --Enter the name of the table to analyze. Do NOT enclose it in brackets: []!
set @MaxColumns = 63 --Set the maximum number of columns per key you want to search for.
set @MaxMinutes = 5 --Set a maximum time limit on the search process.
set nocount on
--Procedure variables
declare @RecordCount bigint --The number of records in the table, and the maximum possible cardinality.
declare @ColumnMask bigint --A bitmask representing the presence or absence of columns in a column set.
declare @ColumnString varchar(8000) --A comma-delimited string including all the column names in a column set.
declare @KeyLength int --The permutation size in columns currently being searched.
declare @StartTime datetime --Used to track execution time of the script.
declare @sqlstring varchar(8000) --Dynamically created sql statement.
declare @ProcessGUID char(32) --This random value will be used to name dynamically created database objects.
declare @SampleTableName varchar(50) --This value will hold the name of a dynamically created subset of the table data.
declare @SubMask1 bigint --Bitmask for storing temporary column mask.
declare @SubMask2 bigint --Bitmask for storing temporary column mask.
declare @SubMask3 bigint --Bitmask for storing temporary column mask.
declare @BitMask bigint --Temporary bitmask for stepping through a column mask to find active columns.
declare @Counter int --counts the number permutations analyzed
set @StartTime = getdate()
set @ProcessGUID = replace(newid(), '-', '') --Generate a random string.
set @SampleTableName = 'tbl' + @ProcessGUID --Generate a random table name.
set @Counter = 0
if @MaxColumns > 63 set @MaxColumns = 63
--Create a temp table to hold the record count processed through dynamic SQL
set @RecordCount =
(select rowcnt
from sysobjects
inner join sysindexes
on sysobjects.id = sysindexes.id
where sysobjects.name = @TableName
and sysindexes.indid in (1, 0))
--Create a table for listing and tracking permutations of columns.
create table #ColumnSets
(ColumnSetID int identity,
ColumnMask bigint, --a bitmask value reprenting the presence or absence of columns in a column set.
ColumnCount int, --the number of columns in the set.
Cardinality bigint, --the number of unique values in the column set.
IsUnique int, -- 0: not a naturalkey. 1: natural key.
ColumnString varchar(4000)) --comma-delimited string listing all the columns in the column set.
exec ('CREATE CLUSTERED INDEX IX_' + @ProcessGUID + '_1 ON #ColumnSets (ColumnMask)')
--Create the parent records in the ColumnSets table
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality,
ColumnString)
select case when colid < 64 then power(cast(2 as bigint), colid-1) else 0 end, --Converts the colid value to a bitmap value.
1,
@RecordCount + 1, --Cardinality is unknown, so estimate 1 higher than the maximum possible cardinality.
'[' + syscolumns.name + ']'
from syscolumns
inner join sysobjects on syscolumns.id = sysobjects.id
where sysobjects.name = @TableName
and syscolumns.prec is not null
and colid < 64
--Informational message
select 'Analyzing table "' + @TableName + '" containing ' + cast(@RecordCount as varchar(20)) + ' records in ' + cast(count(*) as varchar(3)) + ' columns.'
from #ColumnSets
where ColumnCount = 1
--First we will search for large non-primary keys in a sample set of the records. If we find any large permutations that
--are known not to be unique, then we can eliminate any subsets of these permutations from processing later on.
select cast(power(@MaxMinutes, 0.5) -1 as varchar(4)) + ' minutes will be alloted for pre-searching.'
declare @SampleSize int
set @SampleSize = power(@RecordCount, (0.5)) --Sample sqrt(RecordCount)
set @SQLString = 'select top ' + cast(@SampleSize as varchar(10)) + ' * into ' + @SampleTableName + ' from ' + @TableName
exec (@SQLString)
--Start with a columnset including all columns
insert into #ColumnSets
(ColumnMask,
ColumnCount)
select sum(ColumnMask),
count(*)
from #ColumnSets
where ColumnCount = 1
set @KeyLength = (select count(*) from #ColumnSets where ColumnCount = 1)
while exists (select * from #ColumnSets where IsUnique is null and ColumnCount > 1)--isnull((Select max(ColumnCount) from #ColumnSets where IsUnique = 0), 1))
begin
--show status
select 'Largest non-unique permutation found: ' + cast(isnull(max(ColumnCount), 0) as varchar(3)) + ' columns.'
from #ColumnSets
where IsUnique = 0
--If the estimated cardinality is less than the record count, we know the column set cannot
--possibly be unique, so mark it as non-unique.
update #ColumnSets
set IsUnique = 0
where Cardinality < @RecordCount
and IsUnique is null
while exists (select * from #ColumnSets where IsUnique is null and ColumnCount > 1)--isnull((Select max(ColumnCount) from #ColumnSets where IsUnique = 0), 1))
begin
--set @KeyLength = (select max(ColumnCount) from #ColumnSets where IsUnique is null and ColumnCount > 1)
--Get the cardinality of all untested column sets
set @Counter = @Counter +
(Select Count(*)
from #ColumnSets
where IsUnique is null
and ColumnCount = (select max(ColumnCount) from #ColumnSets where IsUnique is null and ColumnCount > 1))
declare CSCursor cursor for
Select ColumnMask,
ColumnString
from #ColumnSets
where IsUnique is null
and ColumnCount = (select max(ColumnCount) from #ColumnSets where IsUnique is null and ColumnCount > 1)
Open CSCursor
Fetch next from CSCursor into @ColumnMask, @ColumnString
while @@FETCH_STATUS = 0
begin
--Create the column string for the bitmask
select @ColumnString = isnull(@ColumnString + ', ', '') + ColumnString
from #ColumnSets
where ColumnCount = 1
and ColumnMask & @ColumnMask = ColumnMask
order by ColumnString
set @SQLString = 'Update #ColumnSets set ColumnString = ''' + @ColumnString + ''', Cardinality = (select count(*) from (select distinct ' + @ColumnString + ' from ' + @SampleTableName + ') DistinctValues), IsUnique = 0 where IsUnique is null and ColumnMask = ' + cast(@ColumnMask as varchar(20))
exec (@SQLString)
fetch next from CSCursor into @ColumnMask, @ColumnString
--Break out of this loop if we have used more than half of the alloted processing time.
if dateadd(mi, power(@MaxMinutes, 0.5), @StartTime) < getdate() break
end
Close CSCursor
Deallocate CSCursor
--Any columnsets with a cardinality equal to the rowcount represent natural keys
Update #ColumnSets set IsUnique = 1 where Cardinality = @SampleSize and ColumnCount > 1
if dateadd(mi, power(@MaxMinutes, 0.5), @StartTime) < getdate() break
--Delete subsets of combinations known not to be unique.
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets Supersets on CandidateSets.ColumnMask & Supersets.ColumnMask = CandidateSets.ColumnMask
where SuperSets.IsUnique = 0
and CandidateSets.ColumnCount > 1
and CandidateSets.ColumnCount < SuperSets.ColumnCount
end
if dateadd(mi, power(@MaxMinutes, 0.5) -1, @StartTime) < getdate() break
--Split non-minimal unique sets into three subsets. We will shuffle these subset together
--to creat new permutations for searching.
declare CSCursor cursor for
Select ColumnMask
from #ColumnSets
where IsUnique = 1
and ColumnCount >= 6
Open CSCursor
Fetch next from CSCursor into @ColumnMask
while @@FETCH_STATUS = 0
begin
set @BitMask = 1
set @SubMask1 = 0
set @SubMask2 = 0
set @SubMask3 = 0
--Note that this permutation has already been split
update #ColumnSets set IsUnique = 2 where ColumnMask = @ColumnMask
--split the TargetMask into three distinct masks
while @BitMask < @ColumnMask
begin
while @BitMask < @ColumnMask and @BitMask & @ColumnMask <> @BitMask set @BitMask = @BitMask * 2
set @SubMask1 = @SubMask1 | @BitMask
if @BitMask > @ColumnMask/2 break
set @BitMask = @BitMask * 2
while @BitMask < @ColumnMask and @BitMask & @ColumnMask <> @BitMask set @BitMask = @BitMask * 2
set @SubMask2 = @SubMask2 | @BitMask
if @BitMask > @ColumnMask/2 break
set @BitMask = @BitMask * 2
while @BitMask < @ColumnMask and @BitMask & @ColumnMask <> @BitMask set @BitMask = @BitMask * 2
set @SubMask3 = @SubMask3 | @BitMask
if @BitMask > @ColumnMask/2 break
set @BitMask = @BitMask * 2
end
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality)
select @SubMask1,
Count(*),
@RecordCount + 1
from #ColumnSets
where ColumnCount = 1
and ColumnMask & @SubMask1 = ColumnMask
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality)
select @SubMask2,
Count(*),
@RecordCount + 1
from #ColumnSets
where ColumnCount = 1
and ColumnMask & @SubMask2 = ColumnMask
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality)
select @SubMask3,
Count(*),
@RecordCount + 1
from #ColumnSets
where ColumnCount = 1
and ColumnMask & @SubMask3 = ColumnMask
fetch next from CSCursor into @ColumnMask
end
Close CSCursor
Deallocate CSCursor
--Create larger combinations of non-unique permutations for testing
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality)
select distinct
ColumnSetsA.ColumnMask | ColumnSetsB.ColumnMask,
ColumnSetsA.ColumnCount + ColumnSetsB.ColumnCount,
@RecordCount + 1
from #ColumnSets ColumnSetsA,
#ColumnSets ColumnSetsB
where ColumnSetsA.ColumnCount + ColumnSetsB.ColumnCount > isnull((Select max(ColumnCount) from #ColumnSets where IsUnique = 0), 0)
and ColumnSetsA.IsUnique = 0
and isnull(ColumnSetsB.IsUnique, 0) = 0
and ColumnSetsA.ColumnMask <> ColumnSetsB.ColumnMask
and ColumnSetsA.ColumnMask & ColumnSetsB.ColumnMask = 0
--Remove duplicates
delete #ColumnSets
from #ColumnSets
left outer join --DistinctSets
(select ColumnMask,
min(ColumnSetID) ColumnSetID
from #ColumnSets
group by ColumnMask) DistinctSets
on #ColumnSets.ColumnMask = DistinctSets.ColumnMask
and #ColumnSets.ColumnSetID = DistinctSets.ColumnSetID
where DistinctSets.ColumnSetID is null
--Delete subsets of combinations known not to be unique.
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets Supersets on CandidateSets.ColumnMask & Supersets.ColumnMask = CandidateSets.ColumnMask
where SuperSets.IsUnique = 0
and CandidateSets.ColumnCount > 1
and CandidateSets.ColumnCount < SuperSets.ColumnCount
and CandidateSets.IsUnique is null
--Delete supersets of combinations already known to be unique.
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets UniqueSets on CandidateSets.ColumnMask & UniqueSets.ColumnMask = UniqueSets.ColumnMask
where UniqueSets.IsUnique > 0
and CandidateSets.ColumnCount > UniqueSets.ColumnCount
and CandidateSets.IsUnique is null
end
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets Supersets on CandidateSets.ColumnMask & Supersets.ColumnMask = CandidateSets.ColumnMask
where SuperSets.IsUnique = 0
and CandidateSets.ColumnCount > 1
and CandidateSets.ColumnCount < SuperSets.ColumnCount
delete
from #ColumnSets
where (IsUnique <> 0 or IsUnique is null)
and ColumnCount > 1
set @SQLString = 'drop table ' + @SampleTableName
exec (@SQLString)
--Now start building permutations of columns and checking them for uniqueness.
set @KeyLength = 1
While @KeyLength <= @MaxColumns
begin
--If there are no more candidates left, then stop looking.
if (select count(*) from #ColumnSets where IsUnique is null) = 0 break
--If the estimated cardinality is less than the record count, we know the column set cannot possibly be unique.
update #ColumnSets
set IsUnique = 0
where Cardinality < @RecordCount
and IsUnique is null
--show status
select ColumnCount,
sum(case when IsUnique is null then 1 else 0 end) as Unknown,
sum(case when IsUnique = 0 then 1 else 0 end) as NotUnique,
sum(case when IsUnique = 1 then 1 else 0 end) as IsUnique
from #ColumnSets
group by ColumnCount
order by ColumnCount
--Get the cardinality of all untested column sets
set @Counter = @Counter +
(Select Count(*)
from #ColumnSets
where Cardinality >= @RecordCount
and IsUnique is null)
declare CSCursor cursor for
Select ColumnMask,
ColumnString
from #ColumnSets
where Cardinality >= @RecordCount
and IsUnique is null
Open CSCursor
Fetch next from CSCursor into @ColumnMask, @ColumnString
while @@FETCH_STATUS = 0
begin
set @SQLString = 'Update #ColumnSets set Cardinality = (select count(*) from (select distinct ' + @ColumnString + ' from ' + @TableName + ') DistinctValues), IsUnique = 0 where IsUnique is null and ColumnMask = ' + cast(@ColumnMask as varchar(20))
exec (@SQLString)
fetch next from CSCursor into @ColumnMask, @ColumnString
if dateadd(mi, @MaxMinutes, @StartTime) < getdate() break
end
Close CSCursor
Deallocate CSCursor
--Delete any ColumnSets with fewer than two distinct values, as they cannot be part of a natural key
Delete from #ColumnSets where Cardinality < 2
--Any columnsets with a cardinality equal to the rowcount represent natural keys.
Update #ColumnSets set IsUnique = 1 where Cardinality = @RecordCount
if dateadd(mi, @MaxMinutes, @StartTime) < getdate() break
set @SQLString = 'select ColumnString as ''' + cast(@KeyLength as varchar(3)) + '-column keys found in ' + cast(datediff(second, @StartTime, getdate()) as varchar(10)) + ' seconds.'' from #ColumnSets where IsUnique = 1 and ColumnCount = ' + cast(@KeyLength as varchar(3)) + ' order by ColumnCount, ColumnString'
exec (@SQLString)
--Remove superfluous permutations (ColumnSet has same cardinality as one of its subsets)
delete ColumnSets
from #ColumnSets ColumnSets
inner join #ColumnSets SubSets
on ColumnSets.Cardinality = SubSets.Cardinality
where ColumnSets.ColumnCount > SubSets.ColumnCount
and ColumnSets.ColumnCount = @KeyLength
and ColumnSets.ColumnMask & SubSets.ColumnMask = SubSets.ColumnMask
--Identify and insert combinations of non-unique permutations that have insufficient potential cardinality to be unique
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality, --estimate the cardinality as the product of the cardinalities of the component columns.
IsUnique,
ColumnString)
select ColumnSetsA.ColumnMask + ColumnSetsB.ColumnMask as ColumnMask,
ColumnSetsA.ColumnCount + ColumnSetsB.ColumnCount as ColumnCount,
min(ColumnSetsA.Cardinality * ColumnSetsB.Cardinality) as Cardinality,
0 as IsUnique,
min(ColumnSetsA.ColumnString + ', ' + ColumnSetsB.ColumnString) as ColumnString
from #ColumnSets ColumnSetsA
inner join #ColumnSets ColumnSetsB on ColumnSetsB.ColumnCount < ColumnSetsA.ColumnCount
where ColumnSetsA.ColumnCount = @KeyLength
and ColumnSetsA.Cardinality * ColumnSetsB.Cardinality <= @RecordCount
and ColumnSetsA.ColumnMask & ColumnSetsB.ColumnMask = 0
and ColumnSetsA.IsUnique = 0
and ColumnSetsB.IsUnique = 0
group by ColumnSetsA.ColumnMask + ColumnSetsB.ColumnMask,
ColumnSetsA.ColumnCount + ColumnSetsB.ColumnCount
--Insert new column sets
set @KeyLength = @KeyLength + 1
insert into #ColumnSets
(ColumnMask,
ColumnCount,
Cardinality, --estimate the cardinality as the product of the cardinalities of the component columns.
ColumnString)
select ParentSets.ColumnMask | UnarySets.ColumnMask as ColumnMask,
ParentSets.ColumnCount + UnarySets.ColumnCount as ColumnCount,
ParentSets.Cardinality * UnarySets.Cardinality as Cardinality,
ParentSets.ColumnString + ', ' + UnarySets.ColumnString as ColumnString
from #ColumnSets ParentSets
inner join #ColumnSets UnarySets on ParentSets.IsUnique = UnarySets.IsUnique
where ParentSets.IsUnique = 0
and ParentSets.ColumnCount = @KeyLength - 1
and UnarySets.ColumnCount = 1
and ParentSets.ColumnMask & UnarySets.ColumnMask = 0
--Remove duplicates
delete #ColumnSets
from #ColumnSets
left outer join --DistinctSets
(select ColumnMask,
min(ColumnString) ColumnString
from #ColumnSets
group by ColumnMask) DistinctSets
on #ColumnSets.ColumnMask = DistinctSets.ColumnMask
and #ColumnSets.ColumnString = DistinctSets.ColumnString
where DistinctSets.ColumnString is null
--Remove supersets of combinations already known to be unique.
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets UniqueSets on CandidateSets.ColumnMask & UniqueSets.ColumnMask = UniqueSets.ColumnMask
where UniqueSets.IsUnique = 1
and CandidateSets.IsUnique is null
--Remove subsets of combinations known not to be unique.
delete CandidateSets
from #ColumnSets CandidateSets
inner join #ColumnSets Supersets on CandidateSets.ColumnMask & Supersets.ColumnMask = CandidateSets.ColumnMask
where SuperSets.IsUnique = 0
and CandidateSets.IsUnique is null
end
-- Make sure that all natural keys present their columns in a uniform alphabetical order
declare CSCursor cursor for
Select ColumnMask
from #ColumnSets
where IsUnique = 1
Open CSCursor
Fetch next from CSCursor into @ColumnMask
while @@FETCH_STATUS = 0
begin
set @ColumnString = null
select @ColumnString = isnull(@ColumnString + ', ', '') + ColumnString
from #ColumnSets
where ColumnCount = 1
and ColumnMask & @ColumnMask = ColumnMask
order by ColumnString
update #ColumnSets set ColumnString = @ColumnString where ColumnMask = @ColumnMask
fetch next from CSCursor into @ColumnMask
end
Close CSCursor
Deallocate CSCursor
--Final output:
select 'Found ' + cast(count(*) as varchar(10)) + ' natural keys with ' + cast(@MaxColumns as varchar(10)) + ' or fewer columns in ' + cast(datediff(minute, @StartTime, getdate()) as varchar(10)) + ' minutes.' from #ColumnSets where IsUnique = 1
select ColumnString as NaturalKeys
from #ColumnSets
where IsUnique = 1
order by ColumnCount, ColumnString
select convert(varchar(20), @Counter) + ' permutations tested.'
drop table #ColumnSets
答案 0 :(得分:3)
如果您正在使用Microsoft套件,则可以使用SSIS。集成服务具有数据分析任务。
作为一种警告,可能需要很长时间,具体取决于表格大小和要包含在候选键中的列数。输出对于识别键的强度非常有用。在您的情况下,您将寻找100%的项目。