仅通过使用SQL脚本检查表内容来查找表关系

时间:2016-09-21 19:21:12

标签: sql

给定两个表格,它们之间没有任何关系。 你能确定两列处于未申报关系的概率吗?

只是为了让我理解:你可以采取任何相关的表格。您删除它们之间的引用。您能否仅通过使用过程语言或一系列SQL脚本检查其内容来猜测原始关系(假设您有足够的数据需要)?

与失踪关系相关还有另一个问题:Find Missing Relationships using PL/SQL。在这里,用户已经知道参与缺失关系的列。在我的情况下,我想检测哪些列。

您可以举例说明您使用的任何数据库类型(SqlServer,Oracle,MySql等...)

1 个答案:

答案 0 :(得分:0)

我创造了这样的尝试。可以改进很多。

DO $$
DECLARE
  value int := 0;

  schema_name_1 text;
  table_name_1 text;
  column_name_1 text;
  sql_1 text;
  res_1 record;

  schema_name_2 text;
  table_name_2 text;
  column_name_2 text;
  sql_2 text;
  res_2 record;

BEGIN
  schema_name_1='public';
  table_name_1='Table1';
  column_name_1='"Id"';

  schema_name_2='public';
  table_name_2='Table2';
  column_name_2='"Id"';

  DROP TABLE IF EXISTS _x;
  CREATE TEMPORARY TABLE _x (
    "Info" text,
    "C1" text,
    "C2" text,
    "Sql1" text,
    "Sql2" text
  );

   sql_1 := concat('ALTER TABLE _x RENAME COLUMN "C1" TO "', schema_name_1, '.', table_name_1, '";');
   sql_2 := concat('ALTER TABLE _x RENAME COLUMN "C2" TO "', schema_name_2, '.', table_name_2, '";');
   EXECUTE sql_1;
   EXECUTE sql_2;

   BEGIN
       -------------------------------------------------------------------------------------------------------------------------------
       sql_1 := concat('SELECT count(t1.', column_name_1, ') AS count FROM ', schema_name_1, '."', table_name_1, '" AS t1;');
       sql_2 := concat('SELECT count(t2.', column_name_2, ') AS count FROM ', schema_name_2, '."', table_name_2, '" AS t2;');

       res_1 = null;
       res_2 = null;

       FOR res_1 IN EXECUTE sql_1 LOOP
            INSERT INTO _x VALUES (
            'Count',
            res_1.count,
            0,
            sql_1,
            sql_2
        );
        END LOOP;
        FOR res_2 IN EXECUTE sql_2 LOOP
            sql_2 := concat('UPDATE _x SET "', schema_name_2, '.', table_name_2, '" = ', res_2.count, ' WHERE "Info" = ''Count'';');
            EXECUTE sql_2;   
        END LOOP;
        RAISE NOTICE '%, %, %', res_1.count, res_2.count, sql_1;            
        -------------------------------------------------------------------------------------------------------------------------------        
        sql_1 = concat('SELECT count(DISTINCT(t1.', column_name_1, ')) AS count FROM ', schema_name_1, '."', table_name_1, '" AS t1;');
        sql_2 = concat('SELECT count(DISTINCT(t2.', column_name_2, ')) AS count FROM ', schema_name_2, '."', table_name_2, '" AS t2;');

    res_1 = null;
    res_2 = null;

        FOR res_1 IN EXECUTE sql_1 LOOP
            INSERT INTO _x VALUES (
            'Distinct Count',
            res_1.count,
            0,
            sql_1,
            sql_2
        );
        END LOOP;
        FOR res_2 IN EXECUTE sql_2 LOOP
            sql_2 := concat('UPDATE _x SET "', schema_name_2, '.', table_name_2, '" = ', res_2.count, ' WHERE "Info" = ''Distinct Count'';');
            EXECUTE sql_2;   
        END LOOP;
    RAISE NOTICE '%, %, %', res_1.count, res_2.count, sql_1;            
        -------------------------------------------------------------------------------------------------------------------------------        
        sql_1 = concat('SELECT count(t1.', column_name_1, ') AS count FROM ', schema_name_1, '."', table_name_1, '" AS t1 JOIN ', schema_name_2, '."', table_name_2, '" AS t2 ON t1.', column_name_1, '= t2.', column_name_2, ';');
        sql_2 = concat('SELECT count(t2.', column_name_2, ') AS count FROM ', schema_name_2, '."', table_name_2, '" AS t2 JOIN ', schema_name_1, '."', table_name_1, '" AS t1 ON t1.', column_name_1, '= t2.', column_name_2, ';');

    res_1 = null;
    res_2 = null;

        FOR res_1 IN EXECUTE sql_1 LOOP
            INSERT INTO _x VALUES (
            'Matched',
            res_1.count,
            0,
            sql_1,
            sql_2
        );
        END LOOP;
        FOR res_2 IN EXECUTE sql_2 LOOP            
            sql_2 := concat('UPDATE _x SET "', schema_name_2, '.', table_name_2, '" = ', res_2.count, ' WHERE "Info" = ''Matched'';');
            EXECUTE sql_2;   
        END LOOP;
        RAISE NOTICE '%, %, %', res_1.count, res_2.count, sql_1;
        -------------------------------------------------------------------------------------------------------------------------------        
        sql_1 = concat('SELECT count(t1.', column_name_1, ') AS count FROM ', schema_name_1, '."', table_name_1, '" AS t1 LEFT JOIN ', schema_name_2, '."', table_name_2, '" AS t2 ON t1.', column_name_1, '= t2.', column_name_2, ' WHERE t2.', column_name_2, ' IS NULL;');
        sql_2 = concat('SELECT count(t2.', column_name_2, ') AS count FROM ', schema_name_2, '."', table_name_2, '" AS t2 LEFT JOIN ', schema_name_1, '."', table_name_1, '" AS t1 ON t1.', column_name_1, '= t2.', column_name_2, ' WHERE t1.', column_name_1, ' IS NULL;');

    res_1 = null;
    res_2 = null;

        FOR res_1 IN EXECUTE sql_1 LOOP
            INSERT INTO _x VALUES (
            'Unlinked',
            res_1.count,
            0,
            sql_1,
            sql_2
        );
        END LOOP;
        FOR res_2 IN EXECUTE sql_2 LOOP            
            sql_2 := concat('UPDATE _x SET "', schema_name_2, '.', table_name_2, '" = ', res_2.count, ' WHERE "Info" = ''Unlinked'';');
            EXECUTE sql_2;   
        END LOOP;
        RAISE NOTICE '%, %, %', res_1.count, res_2.count, sql_1;
        -------------------------------------------------------------------------------------------------------------------------------        
        sql_1 = concat('SELECT MAX(t.count0) AS count FROM (SELECT count(*) AS count0 FROM ', schema_name_1, '."', table_name_1, '" AS t1 JOIN ', schema_name_2, '."', table_name_2, '" AS t2 ON t1.', column_name_1, '= t2.', column_name_2, ' GROUP BY t2.', column_name_2, ') AS t;');
        sql_2 = concat('SELECT MAX(t.count0) AS count FROM (SELECT count(*) AS count0 FROM ', schema_name_2, '."', table_name_2, '" AS t2 JOIN ', schema_name_1, '."', table_name_1, '" AS t1 ON t1.', column_name_1, '= t2.', column_name_2, ' GROUP BY t1.', column_name_1, ') as t;');

    res_1 = null;
    res_2 = null;

        FOR res_1 IN EXECUTE sql_1 LOOP
            INSERT INTO _x VALUES (
            'Multiplicity',
            res_1.count,
            0,
            sql_1,
            sql_2
        );
        END LOOP;
        FOR res_2 IN EXECUTE sql_2 LOOP
            sql_2 := concat('UPDATE _x SET "', schema_name_2, '.', table_name_2, '" = ', res_2.count, ' WHERE "Info" = ''Multiplicity'';');
            EXECUTE sql_2;   
        END LOOP;
        RAISE NOTICE '%, %, %', res_1.count, res_2.count, sql_1;
        -------------------------------------------------------------------------------------------------------------------------------
    END;
END; $$;

SELECT * FROM _x;

输出:

|------------------------------------------------------------------------------------------------------------------------|
|Info             | Table1.Id | Table2.Id |  SQL1                                                                        |
|-----------------+-----------+-----------+------------------------------------------------------------------------------|
|"Count"          |"7386359"  |"12264"    | "SELECT count(t1."Id") AS count FROM public."Table1" AS t1                   |
|-----------------------------------------+------------------------------------------------------------------------------|
|"Distinct Count" |"7386359"  |"12264"    | "SELECT count(DISTINCT(t1."Id")) AS count FROM public."Table1" AS t1         |
|-----------------+-----------+-----------+------------------------------------------------------------------------------|
|"Matched"        |"12264"    |"12264"    | "SELECT count(t1."Id") AS count FROM public."Table1" AS t1                   |
|                 |           |           |     JOIN public."Table2" AS t2 ON t1."Id"= t2."Id"                           |
|-----------------+-----------+-----------+------------------------------------------------------------------------------|
|"Unlinked"       |"7374095"  |"0"        | "SELECT count(t1."Id") AS count FROM public."Table1" AS t1                   |
|                 |           |           |     LEFT JOIN public."Table2" AS t2 ON t1."Id"= t2."Id" WHERE t2."Id" IS NULL|
|-----------------+-----------+-----------+------------------------------------------------------------------------------|
|"Multiplicity"   |"1"        |"1"        | "SELECT MAX(t.count0) AS count FROM (SELECT count(*) AS count0               |
|                 |           |           |     FROM public."Table1" AS t1 JOIN public."Table1" AS t2 ON t1."Id"= t2."Id"|
|                 |           |           |     GROUP BY t2."Id") AS t                                                   |
|------------------------------------------------------------------------------------------------------------------------|