为了修剪生产数据库以便在测试系统中加载,我们删除了许多表中的行。现在这让我们陷入了几个表格,即不再用于任何FK关系的行。我想要实现的就像Java中的垃圾收集。
或者换句话说:如果我在数据库中有M个表。它们中的N个(即大多数但不是全部)具有外键关系。我通过SQL删除了几个高级行(即只有传出FK关系的行)。这样就只在相关表中留下了行。
有人有SQL存储过程或Java程序找到N个表,然后遵循所有FK关系来删除不再需要的行。
如果发现N个表太复杂,我可能会为脚本提供要扫描的表列表,或者最好是要忽略的表的负列表。
另请注意:
A
,B
,C
,...都使用{{1}中的行}。答案 0 :(得分:5)
此问题在MySQL性能博客http://www.percona.com/blog/2011/11/18/eventual-consistency-in-mysql/
中得到解决他提供以下元查询,以生成将识别孤立节点的查询;
SELECT CONCAT(
'SELECT ', GROUP_CONCAT(DISTINCT CONCAT(K.CONSTRAINT_NAME, '.', P.COLUMN_NAME,
' AS `', P.TABLE_SCHEMA, '.', P.TABLE_NAME, '.', P.COLUMN_NAME, '`') ORDER BY P.ORDINAL_POSITION), ' ',
'FROM ', K.TABLE_SCHEMA, '.', K.TABLE_NAME, ' AS ', K.CONSTRAINT_NAME, ' ',
'LEFT OUTER JOIN ', K.REFERENCED_TABLE_SCHEMA, '.', K.REFERENCED_TABLE_NAME, ' AS ', K.REFERENCED_TABLE_NAME, ' ',
' ON (', GROUP_CONCAT(CONCAT(K.CONSTRAINT_NAME, '.', K.COLUMN_NAME) ORDER BY K.ORDINAL_POSITION),
') = (', GROUP_CONCAT(CONCAT(K.REFERENCED_TABLE_NAME, '.', K.REFERENCED_COLUMN_NAME) ORDER BY K.ORDINAL_POSITION), ') ',
'WHERE ', K.REFERENCED_TABLE_NAME, '.', K.REFERENCED_COLUMN_NAME, ' IS NULL;'
) AS _SQL
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE K
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE P
ON (K.TABLE_SCHEMA, K.TABLE_NAME) = (P.TABLE_SCHEMA, P.TABLE_NAME)
AND P.CONSTRAINT_NAME = 'PRIMARY'
WHERE K.REFERENCED_TABLE_NAME IS NOT NULL
GROUP BY K.CONSTRAINT_NAME;
我把它转换成了没有孩子的父母,生产;
SELECT CONCAT(
'SELECT ', GROUP_CONCAT(CONCAT(K.REFERENCED_TABLE_NAME, '.', K.REFERENCED_COLUMN_NAME) ORDER BY K.ORDINAL_POSITION), ' ',
'FROM ', K.REFERENCED_TABLE_SCHEMA, '.', K.REFERENCED_TABLE_NAME, ' AS ', K.REFERENCED_TABLE_NAME, ' ',
'LEFT OUTER JOIN ', K.TABLE_SCHEMA, '.', K.TABLE_NAME, ' AS ', K.CONSTRAINT_NAME, ' ',
' ON (', GROUP_CONCAT(CONCAT(K.CONSTRAINT_NAME, '.', K.COLUMN_NAME) ORDER BY K.ORDINAL_POSITION),
') = (', GROUP_CONCAT(CONCAT(K.REFERENCED_TABLE_NAME, '.', K.REFERENCED_COLUMN_NAME) ORDER BY K.ORDINAL_POSITION), ') ',
'WHERE ', K.CONSTRAINT_NAME, '.', K.COLUMN_NAME, ' IS NULL;'
) AS _SQL
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE K
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE P
ON (K.TABLE_SCHEMA, K.TABLE_NAME) = (P.TABLE_SCHEMA, P.TABLE_NAME)
AND P.CONSTRAINT_NAME = 'PRIMARY'
WHERE K.REFERENCED_TABLE_NAME IS NOT NULL
GROUP BY K.CONSTRAINT_NAME;
答案 1 :(得分:3)
即使是简单的存储过程通常也有点难看,这是一个很有趣的练习,可以将存储过程推到很容易理解的范围之外。
要使用下面的代码,启动MySQL shell,use
目标数据库,从下面粘贴大块存储过程,然后执行
CALL delete_orphans_from_all_tables();
删除数据库中所有表的所有孤立行。
提供缩小的概述:
delete_orphans_from_all_tables
是切入点。所有其他的sprocs都以dofat
为前缀,以明确它们与delete_orphans_from_all_tables
有关,并且让它们更少嘈杂,让它们四处乱窜。delete_orphans_from_all_tables
通过重复调用dofat_delete_orphans_from_all_tables_iter
来工作,直到没有其他行要删除。dofat_delete_orphans_from_all_tables_iter
通过循环遍历所有作为外键约束的目标的表,并为每个表删除当前未从任何地方引用的所有行。以下是代码:
delimiter //
CREATE PROCEDURE dofat_store_tables_targeted_by_foreign_keys ()
BEGIN
-- This procedure creates a temporary table called TargetTableNames
-- containing the names of all tables that are the target of any foreign
-- key relation.
SET @db_name = DATABASE();
DROP TEMPORARY TABLE IF EXISTS TargetTableNames;
CREATE TEMPORARY TABLE TargetTableNames (
table_name VARCHAR(255) NOT NULL
);
PREPARE stmt FROM
'INSERT INTO TargetTableNames(table_name)
SELECT DISTINCT referenced_table_name
FROM INFORMATION_SCHEMA.key_column_usage
WHERE referenced_table_schema = ?';
EXECUTE stmt USING @db_name;
END//
CREATE PROCEDURE dofat_deletion_clause_for_table(
IN table_name VARCHAR(255), OUT result text
)
DETERMINISTIC
BEGIN
-- Given a table Foo, where Foo.col1 is referenced by Bar.col1, and
-- Foo.col2 is referenced by Qwe.col3, this will return a string like:
--
-- NOT (Foo.col1 IN (SELECT col1 FROM BAR) <=> 1) AND
-- NOT (Foo.col2 IN (SELECT col3 FROM Qwe) <=> 1)
--
-- This is used by dofat_delete_orphans_from_table to target only orphaned
-- rows.
--
-- The odd-looking `NOT (x IN y <=> 1)` construct is used in favour of the
-- more obvious (x NOT IN y) construct to handle nulls properly; note that
-- (x NOT IN y) will evaluate to NULL if either x is NULL or if x is not in
-- y and *any* value in y is NULL.
SET @db_name = DATABASE();
SET @table_name = table_name;
PREPARE stmt FROM
'SELECT GROUP_CONCAT(
CONCAT(
\'NOT (\', @table_name, \'.\', referenced_column_name, \' IN (\',
\'SELECT \', column_name, \' FROM \', table_name, \')\',
\' <=> 1)\'
)
SEPARATOR \' AND \'
) INTO @result
FROM INFORMATION_SCHEMA.key_column_usage
WHERE
referenced_table_schema = ?
AND referenced_table_name = ?';
EXECUTE stmt USING @db_name, @table_name;
SET result = @result;
END//
CREATE PROCEDURE dofat_delete_orphans_from_table (table_name varchar(255))
BEGIN
-- Takes as an argument the name of a table that is the target of at least
-- one foreign key.
-- Deletes from that table all rows that are not currently referenced by
-- any foreign key.
CALL dofat_deletion_clause_for_table(table_name, @deletion_clause);
SET @stmt = CONCAT(
'DELETE FROM ', @table_name,
' WHERE ', @deletion_clause
);
PREPARE stmt FROM @stmt;
EXECUTE stmt;
END//
CREATE PROCEDURE dofat_delete_orphans_from_all_tables_iter(
OUT rows_deleted INT
)
BEGIN
-- dofat_store_tables_targeted_by_foreign_keys must be called before this
-- will work.
--
-- Loops ONCE over all tables that are currently referenced by a foreign
-- key. For each table, deletes all rows that are not currently referenced.
-- Note that this is not guaranteed to leave all tables without orphans,
-- since the deletion of rows from a table late in the sequence may leave
-- rows from a table early in the sequence orphaned.
DECLARE loop_done BOOL;
-- Variable name needs to differ from the column name we use to populate it
-- because of bug http://bugs.mysql.com/bug.php?id=28227
DECLARE table_name_ VARCHAR(255);
DECLARE curs CURSOR FOR SELECT table_name FROM TargetTableNames;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET loop_done = TRUE;
SET rows_deleted = 0;
SET loop_done = FALSE;
OPEN curs;
REPEAT
FETCH curs INTO table_name_;
CALL dofat_delete_orphans_from_table(table_name_);
SET rows_deleted = rows_deleted + ROW_COUNT();
UNTIL loop_done END REPEAT;
CLOSE curs;
END//
CREATE PROCEDURE delete_orphans_from_all_tables ()
BEGIN
CALL dofat_store_tables_targeted_by_foreign_keys();
REPEAT
CALL dofat_delete_orphans_from_all_tables_iter(@rows_deleted);
UNTIL @rows_deleted = 0 END REPEAT;
END//
delimiter ;
顺便说一下,这个练习让我了解了一些使得使用MySQL sprocs编写这种复杂程度的代码令人沮丧的事情。我提到所有这些只是因为他们可以帮助你,或者一个好奇的未来读者,在上面的代码中理解看起来像疯狂的风格选择。
PREPARE
/ EXECUTE
组合来使用动态SQL。)PREPARE stmt FROM CONCAT( ... );
是语法错误,而@foo = CONCAT( ... ); PREPARE stmt FROM @foo;
则不是。EXECUTE stmt USING @foo
很好,但EXECUTE stmt USING foo
其中foo
是一个过程变量是语法错误。SELECT
语句和一个最后一个语句是select语句的过程都返回一个结果集,但几乎所有你想用结果集做的事情(比如循环它或检查是否有什么东西)是IN
it)只能定位到SELECT
语句,而不是CALL
语句。NULL
,但不会引发警告或错误缺乏在程序之间干净地传递结果集的能力
结果集是SQL中的基本类型;它们是SELECT
返回的内容,当您从应用程序层使用SQL时,您会将它们视为对象。但是在MySQL sproc中,你不能将它们分配给变量或将它们从一个sproc传递到另一个sproc。如果您确实需要此功能,则必须让一个sproc将结果集写入临时表,以便另一个sproc可以读取它。
SET foo = bar
,SELECT foo = bar
和SELECT bar INTO foo
。EXECUTE
)都不会接受任何其他类型的变量。尽管有这些障碍,如果你有决心,你仍然可以将这样的小程序与sprocs拼凑在一起。
答案 2 :(得分:0)
由于我有一些奇怪的SQL语法错误,这里有一个解决方案,它使用接受的答案和Groovy中的SQL。使用orphanedNodeStatistics()
获取每个表将被删除的节点数,dumpOrphanedNodes(String tableName)
转储将被删除的节点的PK,并deleteOrphanedNodes(String tableName)
删除它们。
要删除所有这些内容,请迭代tablesTargetedByForeignKeys()
import groovy.sql.Sql
class OrphanNodesTool {
Sql sql;
String schema;
Set<String> tablesTargetedByForeignKeys() {
def query = '''\
SELECT referenced_table_name
FROM INFORMATION_SCHEMA.key_column_usage
WHERE referenced_table_schema = ?
'''
def result = new TreeSet()
sql.eachRow( query, [ schema ] ) { row ->
result << row[0]
}
return result
}
String conditionsToFindOrphans( String tableName ) {
List<String> conditions = []
def query = '''\
SELECT referenced_column_name, column_name, table_name
FROM INFORMATION_SCHEMA.key_column_usage
WHERE referenced_table_schema = ?
AND referenced_table_name = ?
'''
sql.eachRow( query, [ schema, tableName ] ) { row ->
conditions << "NOT (${tableName}.${row.referenced_column_name} IN (SELECT ${row.column_name} FROM ${row.table_name}) <=> 1)"
}
return conditions.join( '\nAND ' )
}
List<Long> listOrphanedNodes( String tableName ) {
def query = """\
SELECT ${tableName}.${tableName}_ID
FROM ${tableName}
WHERE ${conditionsToFindOrphans(tableName)}
""".toString()
def result = []
sql.eachRow( query ) { row ->
result << row[0]
}
return result
}
void dumpOrphanedNodes( String tableName ) {
def pks = listOrphanedNodes( tableName )
println( String.format( "%8d %s", pks.size(), tableName ) )
if( pks.size() < 10 ) {
pks.each {
println( String.format( "%16d", it as long ) )
}
} else {
pks.collate( 20 ) { chunk ->
chunk.each {
print( String.format( "%16d ", it as long ) )
}
println()
}
}
}
int countOrphanedNodes( String tableName ) {
def query = """\
SELECT COUNT(*)
FROM ${tableName}
WHERE ${conditionsToFindOrphans(tableName)}
""".toString()
int result;
sql.eachRow( query ) { row ->
result = row[0]
}
return result
}
int deleteOrphanedNodes( String tableName ) {
def query = """\
DELETE
FROM ${tableName}
WHERE ${conditionsToFindOrphans(tableName)}
""".toString()
int result = sql.execute( query )
return result
}
void orphanedNodeStatistics() {
def tableNames = tablesTargetedByForeignKeys()
for( String tableName : tableNames ) {
int n = countOrphanedNodes( tableName )
println( String.format( "%8d %s", n, tableName ) )
}
}
}
(gist)