我确实有一个基于 5.22 版中的 Activiti engine 的应用程序。目前,它在给定的时间范围内产生的历史信息比历史清除过程能够在同一时间范围内删除的还要多。有什么想法可以改善这种行为吗?
答案 0 :(得分:2)
Activity 框架在下表中创建历史数据:
ACT_HI_PROCINST
ACT_HI_ACTINST
ACT_HI_TASKINST
ACT_GE_BYTEARRAY
ACT_HI_VARINST
ACT_HI_DETAIL
ACT_HI_COMMENT
ACT_HI_ATTACHMENT
ACT_HI_IDENTITYLINK
默认清理过程获取 ACT_HI_PROCINST
,然后尝试删除它们以及链接到给定 ACT_HI_PROCINST
的其他历史记录表。删除过程不使用批处理或批量删除,因此速度非常慢。
加快清理过程的一种方法是执行存储过程,如下所示:
CREATE PROCEDURE usp_DeleteActivityHistory(
@BeforeStartTimestamp DATETIME,
@BatchSize INT,
@DeletedRowCount INT OUTPUT
)
AS
BEGIN
DROP TABLE IF EXISTS #ROOT_PROC_INST_ID_TABLE;
CREATE TABLE #ROOT_PROC_INST_ID_TABLE (PROC_INST_ID_ NVARCHAR(64));
DROP TABLE IF EXISTS #PROC_INST_ID_TABLE;
CREATE TABLE #PROC_INST_ID_TABLE (PROC_INST_ID_ NVARCHAR(64));
DROP TABLE IF EXISTS #TASK_INST_ID_TABLE;
CREATE TABLE #TASK_INST_ID_TABLE (ID_ NVARCHAR(64));
INSERT INTO #ROOT_PROC_INST_ID_TABLE
SELECT TOP (@BatchSize) PROC_INST_ID_
FROM ACT_HI_PROCINST
WHERE
END_TIME_ <= @BeforeStartTimestamp
AND END_TIME_ IS NOT NULL
AND SUPER_PROCESS_INSTANCE_ID_ IS NULL;
SET @DeletedRowCount=0;
DECLARE @DeletedBatchRowCount INT;
WHILE (SELECT COUNT(*) FROM #ROOT_PROC_INST_ID_TABLE) > 0
BEGIN
TRUNCATE TABLE #PROC_INST_ID_TABLE;
TRUNCATE TABLE #TASK_INST_ID_TABLE;
SET @DeletedBatchRowCount=0;
WITH ACT_HI_PROCINST_HIERARCHY(PROC_INST_ID_)
AS (
SELECT PROC_INST_ID_
FROM #ROOT_PROC_INST_ID_TABLE
UNION ALL
SELECT ACT_HI_PROCINST.PROC_INST_ID_
FROM ACT_HI_PROCINST
INNER JOIN ACT_HI_PROCINST_HIERARCHY ON ACT_HI_PROCINST_HIERARCHY.PROC_INST_ID_ = ACT_HI_PROCINST.SUPER_PROCESS_INSTANCE_ID_
)
INSERT INTO #PROC_INST_ID_TABLE
SELECT PROC_INST_ID_
FROM ACT_HI_PROCINST_HIERARCHY;
BEGIN TRY
BEGIN TRANSACTION;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_DETAIL
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_DETAIL
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_VARINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_VARINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_ACTINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
-- Delete ACT_HI_TASKINST rows recursive along with their associated:
-- ACT_HI_DETAIL, ACT_HI_VARINST, ACT_HI_COMMENT, ACT_HI_ATTACHMENT, ACT_HI_IDENTITYLINK
BEGIN
WITH ACT_HI_TASKINST_HIERARCHY(ID_)
AS (
SELECT ID_
FROM ACT_HI_TASKINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE)
UNION ALL
SELECT ACT_HI_TASKINST.ID_
FROM ACT_HI_TASKINST
INNER JOIN ACT_HI_TASKINST_HIERARCHY ON ACT_HI_TASKINST_HIERARCHY.ID_ = ACT_HI_TASKINST.PARENT_TASK_ID_
)
INSERT INTO #TASK_INST_ID_TABLE
SELECT ID_
FROM ACT_HI_TASKINST_HIERARCHY;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_DETAIL
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_DETAIL
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT BYTEARRAY_ID_ FROM ACT_HI_VARINST
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_VARINST
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_COMMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_GE_BYTEARRAY
WHERE ID_ IN (
SELECT CONTENT_ID_ FROM ACT_HI_ATTACHMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE)
);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_ATTACHMENT
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_IDENTITYLINK
WHERE TASK_ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_TASKINST
WHERE ID_ IN (SELECT ID_ FROM #TASK_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
END;
DELETE FROM ACT_HI_IDENTITYLINK
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_COMMENT
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
DELETE FROM ACT_HI_PROCINST
WHERE PROC_INST_ID_ IN (SELECT PROC_INST_ID_ FROM #PROC_INST_ID_TABLE);
SET @DeletedBatchRowCount+=@@ROWCOUNT;
COMMIT TRANSACTION;
SET @DeletedRowCount+=@DeletedBatchRowCount;
END TRY
BEGIN CATCH
IF (XACT_STATE()) = -1
-- The current transaction cannot be committed.
BEGIN
PRINT
N'The transaction cannot be committed. Rolling back transaction.'
ROLLBACK TRANSACTION;
END;
ELSE
IF (XACT_STATE()) = 1
-- The current transaction can be committed.
BEGIN
PRINT
N'Exception was caught, but the trasaction can be committed.'
COMMIT TRANSACTION;
END;
END CATCH;
TRUNCATE TABLE #ROOT_PROC_INST_ID_TABLE;
INSERT INTO #ROOT_PROC_INST_ID_TABLE
SELECT TOP (@BatchSize) PROC_INST_ID_
FROM ACT_HI_PROCINST
WHERE
END_TIME_ <= @BeforeStartTimestamp
AND END_TIME_ IS NOT NULL
AND SUPER_PROCESS_INSTANCE_ID_ IS NULL;
END
DROP TABLE IF EXISTS #ROOT_PROC_INST_ID_TABLE;
DROP TABLE IF EXISTS #PROC_INST_ID_TABLE;
DROP TABLE IF EXISTS #TASK_INST_ID_TABLE;
END
这个存储过程有以下优点:
您可以使用 JDBC 执行此 Stroed 过程,如下所示:
public int deleteActivityHistoryBeforeDate(
Connection connection,
Timestamp olderThanTimestamp,
int batchSize) {
long startNanos = System.nanoTime();
try (CallableStatement sp = connection.prepareCall(
"{ call usp_DeleteActivityHistory(?, ?, ?) }")
) {
sp.setTimestamp(1, olderThanTimestamp);
sp.setInt(2, batchSize);
sp.registerOutParameter("DeletedRowCount", Types.INTEGER);
sp.execute();
int rowCount = sp.getInt("DeletedRowCount");
LOGGER.info(
"Deleted {} records in {} milliseconds",
rowCount,
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)
);
return rowCount;
} catch (SQLException e) {
LOGGER.error("The usp_DeleteActivityHistory execution failed", e);
return 0;
}
}
connection
参数是过程将在其上执行的 JDBC Connection
。
olderThanTimestamp
是一个 Timestamp
对象,它告诉 END_TIME_
表中 ACT_HI_PROCINST
列的最大时间戳值是多少,直至删除历史记录。>
batchSize
参数表示在一次事务中删除了多少根 ACT_HI_PROCINST
记录。执行批处理后,进程继续执行下一个,直到数据库中不存在比 ACT_HI_PROCINST
值更旧的根 olderThanTimestamp
行。
返回值包含通过使用提供的参数值执行存储过程而删除的所有表中的表记录数。
如果您想了解存储过程在实践中的工作原理,请查看 this test case on GitHub。