查找在数据库中复制/复制结构的表

时间:2015-04-02 21:10:43

标签: sql sql-server tsql sql-server-2014-express

我在SQL Server中有1000个表,每个表都是从CSV文件创建的。每个表中的数据都是相似的,每个表代表不同的一天。

我遇到的问题是,表的结构和列的名称有很多变化。

然而,有些表格具有匹配的结构,我认为组合数据的一个很好的起点是将这些数据中的所有数据组合在一起。

我一直在寻找一种查询数据库的方法,以便找到具有相同结构但尚未成功的这些表。

非常感谢任何帮助。

4 个答案:

答案 0 :(得分:1)

以下代码检查具有确切列数的表以及列类型是否匹配。请注意,订单并不重要。例如,如果您有两个这样的表:

Table01
Column01 INT
Column02 BIT

Table02
Column01 BIT
Column02 INT

将会匹配相同的结构。


下面的代码很简单 - 我们正在为每个表创建CSV列表及其列类型。

DECLARE @DataSource TABLE
(
     [name] SYSNAME
    ,[value] VARCHAR(MAX)
);

INSERT INTO @DataSource ([name], [value])
SELECT T.[name]                                                                 
      ,ColumnsTypesCSV.[value]
FROM [sys].[tables] T
CROSS APPLY
(
    SELECT STUFF
    (
        (
            SELECT ',' + CAST([system_type_id] AS VARCHAR(12))
            FROM [sys].[columns] C
            WHERE T.[object_id] = C.[object_id]
            ORDER BY [system_type_id]
            FOR XML PATH(''), TYPE
        ).value('.', 'VARCHAR(MAX)')
        ,1
        ,1
        ,''
    )
) ColumnsTypesCSV ([value]);

表中的选择如下所示:

enter image description here

现在,我们将完成同样的事情,但这次分组是由列类型CSV列表:

SELECT DS.[value]
      ,NamesCSV.[value]
FROM @DataSource DS
CROSS APPLY
(
    SELECT STUFF
    (
        (
            SELECT ',' + [name]
            FROM @DataSource D
            WHERE DS.[value] = D.[value]
            ORDER BY [name]
            FOR XML PATH(''), TYPE
        ).value('.', 'VARCHAR(MAX)')
        ,1
        ,1
        ,''
    )
) NamesCSV ([value]);

我正在AdventureWorks2012数据库中测试此代码,它实际上找到了匹配的表:

enter image description here

当然这只是一个起点。你也可以查看其他东西。例如,对于每个列类型ID,如果列为NULLNOT NULL,则可以添加:

TYPEID|NOTNULL,TYPEID|NULL...

答案 1 :(得分:1)

您可以在信息视图INFORMATION_SCHEMA.COLUMNS中找到大量数据。

这将为您(除其他外)提供表名,列顺序,列名和列定义。

所以,例如,你可以这样做:

;
-- Create a list of table pairs.  If you have reason to believe that
-- some tables are more likely to be similar than others, you can
-- modify this CTE as you need to.
with A as (
    select    T1.table_name
         ,    t2.TABLE_NAME as other_table_Name
    from    information_Schema.TABLES t1
        join information_schema.tables t2
            on    t1.TABLE_NAME < t2.TABLE_NAME
)
-- Pick all the pairs of table names ...
select   * 
from     A
where    NOT  exists (
    -- where the first table does NOT have any columns ...
    select  1
    from    INFORMATION_SCHEMA.columns c1
    where    A.TABLE_NAME = C1.TABLE_NAME
        and not exists (
        -- ... that are NOT found in the second table ...
            select  1
            from    INFORMATION_SCHEMA.columns c2
            where   c2.Table_Name = A.other_table_Name
               AND  c1.ordinal_position = c2.ordinal_position
               and  c1.data_type = c2.data_type
               and  ((c1.CHARACTER_MAXIMUM_LENGTH is null and 
                     c2.CHARACTER_MAXIMUM_LENGTH is null) or               
                     c1.CHARACTER_MAXIMUM_LENGTH = c2.CHARACTER_MAXIMUM_LENGTH)
        )
    )
    and    NOT  exists (
    -- ... and the second table doesn't have any columns ...
        select  1
        from    INFORMATION_SCHEMA.columns c1
        where    A.OTHER_TABLE_NAME = C1.TABLE_NAME
             and not exists (
             -- that are not also found in the first table!
                select 1
                from    INFORMATION_SCHEMA.columns c2
                where    c2.Table_Name = A.TABLE_NAME
                  AND    c1.ordinal_position = c2.ordinal_position
                  and c1.data_type = c2.data_type
                  and ((c1.CHARACTER_MAXIMUM_LENGTH is null and 
                       c2.CHARACTER_MAXIMUM_LENGTH is null) or 
                       c1.CHARACTER_MAXIMUM_LENGTH = c2.CHARACTER_MAXIMUM_LENGTH)
         )
    )

答案 2 :(得分:1)

我使用了来自information_schema.columns的校验和和一堆列。这将为您提供一个表列表,任何具有相同幻数(校验和总和)的表都是匹配的。

declare @s1   sysname
declare @n1   sysname
declare @olds1 sysname
declare @oldn1 sysname
declare @curmagicnum decimal(18,0)

if OBJECT_ID('tempdb..#alltables','U') is not null
    drop table #alltables 

create table #alltables (schema_name sysname,
                   table_name sysname,
                   magicnum decimal(18,0))


select top 1 @s1 = TABLE_SCHEMA, @n1=table_name from INFORMATION_SCHEMA.TABLES order by TABLE_SCHEMA,table_name

    while (1=1)
    begin

    select   @curmagicnum= SUM(CAST(
      CHECKSUM (COLUMN_NAME,COLUMN_DEFAULT, IS_NULLABLE,    
      DATA_TYPE,    CHARACTER_MAXIMUM_LENGTH,   
      NUMERIC_PRECISION,    NUMERIC_SCALE,  DATETIME_PRECISION) 
as decimal(18,0))) 

from    INFORMATION_SCHEMA.columns 
where TABLE_NAME = @n1 and TABLE_SCHEMA=@s1

    insert into #alltables values (@s1,@n1,@curmagicnum)

    set @oldn1 = @n1
    set @olds1 = @s1
    select top 1 @s1 = TABLE_SCHEMA, @n1=table_name 



from INFORMATION_SCHEMA.TABLES  
        where TABLE_SCHEMA+'.'+TABLE_NAME> @s1+'.'+@n1
        order by TABLE_SCHEMA,table_name
        if @@ROWCOUNT=0
            break

    end
    ;


     with t1 as (select *,ROW_NUMBER() over (PARTITION by magicnum order by table_name) as count1 from #alltables)

    select schema_name,table_name,magicnum 
    from #alltables 
    where magicnum in (select magicnum from t1  where count1> 1)
        order by magicnum,table_name

答案 3 :(得分:1)

因此,如果表格完全相同,那么试试这个。我实际上用它来创建你的insert语句,如果你想要它可以删除旧表。

IF OBJECT_ID('dbo.table1') IS NOT NULL DROP TABLE dbo.table1;
IF OBJECT_ID('dbo.table2') IS NOT NULL DROP TABLE dbo.table2;
IF OBJECT_ID('dbo.table3') IS NOT NULL DROP TABLE dbo.table3;
IF OBJECT_ID('dbo.table4') IS NOT NULL DROP TABLE dbo.table4;
IF OBJECT_ID('dbo.table5') IS NOT NULL DROP TABLE dbo.table5;

CREATE TABLE table1 (ID INT,FirstName VARCHAR(25),LastName NVARCHAR(25),EntryDate DATETIME,AvgScore NUMERIC(18,6)); --table1
CREATE TABLE table2 (ID INT,FirstName VARCHAR(25),LastName NVARCHAR(25),EntryDate DATETIME,AvgScore NUMERIC(18,6)); --matches table1
CREATE TABLE table3 (ID INT,FirstName VARCHAR(25),LastName NVARCHAR(25),EntryDate DATETIME); --table3
CREATE TABLE table4 (ID INT,FirstName VARCHAR(25),LastName NVARCHAR(25),EntryDate DATETIME); --matches table3
CREATE TABLE table5 (ID INT,FirstName VARCHAR(25),LastName NVARCHAR(25),EntryDate DATETIME,AvgScore NUMERIC(18,6)); --matches table1




WITH CTE_matching_Tables
AS
(
    SELECT
            A.TABLE_NAME primaryTable,
            A.total_columns,
            COUNT(*) AS matching_columns,
            B.TABLE_NAME AS matchedTable
    FROM        (SELECT *, MAX(ORDINAL_POSITION) OVER (PARTITION BY Table_NAME) AS total_columns FROM INFORMATION_SCHEMA.COLUMNS) A
    INNER JOIN  (SELECT *, MAX(ORDINAL_POSITION) OVER (PARTITION BY Table_NAME) AS total_columns FROM INFORMATION_SCHEMA.COLUMNS) B
    ON      A.TABLE_NAME < B.TABLE_NAME
        AND A.ORDINAL_POSITION = B.ORDINAL_POSITION
        AND A.total_columns = B.total_columns
        AND A.COLUMN_NAME = B.COLUMN_NAME
        AND A.DATA_TYPE = B.DATA_TYPE
        AND A.IS_NULLABLE = B.IS_NULLABLE
        AND (       (A.CHARACTER_MAXIMUM_LENGTH = B.CHARACTER_MAXIMUM_LENGTH) 
                OR  (A.CHARACTER_MAXIMUM_LENGTH IS NULL AND B.CHARACTER_MAXIMUM_LENGTH IS NULL)
            )
        AND (       (A.NUMERIC_PRECISION = B.NUMERIC_PRECISION) 
                OR  (A.NUMERIC_PRECISION IS NULL AND B.NUMERIC_PRECISION IS NULL)
            )
        AND (       (A.NUMERIC_SCALE = B.NUMERIC_SCALE) 
                OR  (A.NUMERIC_SCALE IS NULL AND B.NUMERIC_SCALE IS NULL)
            )
        AND (       (A.DATETIME_PRECISION = B.DATETIME_PRECISION) 
                OR  (A.DATETIME_PRECISION IS NULL AND B.DATETIME_PRECISION IS NULL)
            )
    GROUP BY A.TABLE_NAME,A.total_columns,B.TABLE_NAME
    HAVING A.total_columns = COUNT(*)
)

--CTE has all table matches. I find the lowest occurring primaryTable for each matchedTable
    --That way in my case table2 and table 5 insert into table 1 even though table2 and table5 also match
SELECT  'INSERT INTO ' + MIN(primaryTable) + ' SELECT * FROM ' + matchedTable + '; DROP TABLE ' + matchedTable + ';'
FROM CTE_matching_Tables
GROUP BY matchedTable

结果:

INSERT INTO table1 SELECT * FROM table2; DROP TABLE table2;
INSERT INTO table3 SELECT * FROM table4; DROP TABLE table4;
INSERT INTO table1 SELECT * FROM table5; DROP TABLE table5;