考虑到历史记录,我想要导入人员数据。
我写了单个sql步骤,但出现了两个问题:
我有一个步骤,我有一个意外的日期
2:我想避免手动提交一些步骤并使用存储过程
表格是:
根据历史记录填写表格
CREATE TABLE person (
id INTEGER DEFAULT NULL
, name VARCHAR(50) DEFAULT NULL
, effective_dt DATE DEFAULT NULL
, expiry_dt DATE DEFAULT NULL
);
包含要导入的人员数据的表格
CREATE TABLE person_stg (
id INTEGER DEFAULT NULL
, name VARCHAR(50) DEFAULT NULL
, export_dt DATE DEFAULT NULL
, import_flag TINYINT DEFAULT 0
);
-- Several exports which has to be imported
INSERT INTO person_stg (id, name, export_dt) VALUES
(1,'Jonn' , '2000-01-01')
, (2,'Marry' , '2000-01-01')
, (1,'John' , '2000-01-05')
, (2,'Marry' , '2000-01-06')
, (2,'Mary' , '2000-01-10')
, (3,'Samuel', '2000-01-10')
, (2,'Maria' , '2000-01-15')
;
以下第一步(1)使用该人的第一个状态填充表 person :
INSERT INTO person
SELECT a.id, a.name, a.export_dt, '9999-12-31' expiry_dt
FROM person_stg a
LEFT JOIN person_stg b
ON a.id = b.id
AND a.export_dt > b.export_dt
WHERE b.id IS NULL
;
SELECT * FROM person ORDER BY id, effective_dt;
+----+--------+--------------+------------+
| id | name | effective_dt | expiry_dt |
+----+--------+--------------+------------+
| 1 | Jonn | 2000-01-01 | 9999-12-31 |
| 2 | Marry | 2000-01-01 | 9999-12-31 |
| 3 | Samuel | 2000-01-10 | 9999-12-31 |
+----+--------+--------------+------------+
步骤(2)更改到期日期:
-- (2) Update expiry_dt where changes happened
UPDATE
person a
, person_stg b
SET a.expiry_dt = SUBDATE(b.export_dt,1)
WHERE a.id = b.id
AND a.name <> b.name
AND a.expiry_dt = '9999-12-31'
AND b.export_dt = (SELECT MIN(b.export_dt)
FROM person_stg c
WHERE b.id = c.id
AND c.import_flag = 0
)
;
SELECT * FROM person ORDER BY id, effective_dt;
+----+--------+--------------+------------+
| id | name | effective_dt | expiry_dt |
+----+--------+--------------+------------+
| 1 | Jonn | 2000-01-01 | 2000-01-04 |
| 2 | Marry | 2000-01-01 | 2000-01-09 |
| 3 | Samuel | 2000-01-10 | 9999-12-31 |
+----+--------+--------------+------------+
第三步(3)插入人物数据的第二个状态:
-- (3) Insert new exports which has changes
INSERT INTO person
SELECT a.id, a.name, a.export_dt, '9999-12-31' expiry_dt
FROM person_stg a
INNER JOIN person b
ON a.id = b.id
AND b.expiry_dt = SUBDATE(a.export_dt,1)
AND a.export_dt > b.effective_dt
AND a.import_flag = 0
;
SELECT * FROM person ORDER BY id, effective_dt;
+----+--------+--------------+------------+
| id | name | effective_dt | expiry_dt |
+----+--------+--------------+------------+
| 1 | Jonn | 2000-01-01 | 2000-01-04 |
| 1 | John | 2000-01-05 | 9999-12-31 |
| 2 | Marry | 2000-01-01 | 2000-01-09 |
| 2 | Mary | 2000-01-10 | 9999-12-31 |
| 3 | Samuel | 2000-01-10 | 9999-12-31 |
+----+--------+--------------+------------+
最后一步(4)在 person_stg 上定义了哪个记录被插入:
-- (4) Define imported records
UPDATE
person_stg a
, person b
SET import_flag = 1
WHERE a.id = b.id
AND a.export_dt = b.effective_dt
;
到目前为止,这么好。如果我重复步骤(2),我得到下表:
+----+--------+--------------+------------+
| id | name | effective_dt | expiry_dt |
+----+--------+--------------+------------+
| 1 | Jonn | 2000-01-01 | 2000-01-04 |
| 1 | John | 2000-01-05 | 9999-12-31 |
| 2 | Marry | 2000-01-01 | 2000-01-09 |
| 2 | Mary | 2000-01-10 | 1999-12-31 | <--- ??? Should be 2000-01-14
| 3 | Samuel | 2000-01-10 | 9999-12-31 |
+----+--------+--------------+------------+
Mary / 2000-01-10得到expiry_dt 1999-12-31而不是2000-01-14。我不明白这是怎么回事 所以,我的问题是:
(1a)为什么这个有效期的更新给出了这个奇怪的日期?
(1b)那么(2)可能有更好的代码吗?
(2)如何自动重复步骤(2)直到(4)?我只需要一些存储过程的提示。 - (4)定义导入的记录 UPDATE person_stg a ,人b SET import_flag = 1 在哪里a.id = b.id AND a.export_dt = b.effective_dt ;
答案 0 :(得分:1)
如果我理解你想做什么,你就不需要一个多步骤的过程。您只是在寻找&#34;结束日期&#34;对于每条记录。这是一个使用相关子查询的方法:
SELECT p.*, export_dt as effdate,
COALESCE((SELECT export_dt - interval 1 day
FROM person_stg p2
WHERE p2.id = p.id AND
p2.export_dt > p.export_dt
ORDER BY p2.export_dt
LIMIT 1
), '9999-12-31') as enddate
FROM person_stg p;
你也可以使用变量做点什么。
我不确定这是否能回答您的问题,因为它会用更简单的查询替换整个过程。
答案 1 :(得分:0)
我找到了一个使用游标的解决方案,我之前从未使用过。首先,我创建了一个存储过程(SP)sp_add_record,它更新,插入新状态或从patient_stg插入给定id和export_dt的新元素。然后使用SP与光标(curs_add_records):
使用此存储过程CALL curs_add_records();
SELECT * FROM person;
+----+--------+--------------+------------+
| id | name | effective_dt | expiry_dt |
+----+--------+--------------+------------+
| 1 | Jonn | 2000-01-01 | 2000-01-04 |
| 2 | Marry | 2000-01-01 | 2000-01-09 |
| 1 | John | 2000-01-05 | 9999-12-31 |
| 2 | Mary | 2000-01-10 | 2000-01-14 |
| 3 | Samuel | 2000-01-10 | 9999-12-31 |
| 2 | Maria | 2000-01-15 | 9999-12-31 |
+----+--------+--------------+------------+
这个程序的优点是,如果是初始负载(人口负载)或增量,我可以独立加载具有相同代码的表。
我使用的文学:
Djoni Damrawikarte:使用MySQL进行维度数据仓库(DWH问题)
Ben Forta:MariaDB速成课程(SP问题)
以下是我使用的SP。
PS:回答我自己的问题是否合适?DELIMITER //
DROP PROCEDURE IF EXISTS sp_add_record //
CREATE PROCEDURE sp_add_record(
IN p_id INTEGER
, IN p_export_dt DATE
)
BEGIN
-- Change expiry_dt
UPDATE
person p
, person_stg s
SET p.expiry_dt = SUBDATE(p_export_dt,1)
WHERE p.id = s.id
AND p.id = p_id
AND s.export_dt = p_export_dt
AND p.effective_dt <= p_export_dt
AND ( p.name <> s.name )
AND p.expiry_dt = '9999-12-31'
;
-- Add new status
INSERT INTO person
SELECT s.id, s.name, s.export_dt, '9999-12-31' expiry_dt
FROM
person p
, person_stg s
WHERE p.id = s.id
AND p.id = p_id
AND s.export_dt = p_export_dt
AND ( p.name <> s.name )
-- does a entry exists with new expiry_dt?
AND EXISTS (SELECT *
FROM person p2
WHERE p2.id = p.id
AND p.expiry_dt = SUBDATE(p_export_dt,1)
)
-- entry with open expiry_dt not should not exist
AND NOT EXISTS (SELECT *
FROM person p3
WHERE p3.id = p.id
AND p3.expiry_dt = '9999-12-31'
)
;
-- Add new id
INSERT INTO person
SELECT s.id, s.name, s.export_dt, '9999-12-31' expiry_dt
FROM person_stg s
WHERE s.export_dt = p_export_dt
AND s.id = p_id
-- Add new id from stage if it does not exist in person
AND s.id NOT IN (SELECT p3.id
FROM person p3
WHERE p3.id = s.id
AND p3.expiry_dt = '9999-12-31'
)
;
END
//
DELIMITER ;
DELIMITER //
DROP PROCEDURE IF EXISTS curs_add_records //
CREATE PROCEDURE curs_add_records()
BEGIN
-- Local variables
DECLARE done BOOLEAN DEFAULT 0;
DECLARE p_id INTEGER;
DECLARE p_export_dt DATE;
-- Cursor
DECLARE c1 CURSOR
FOR
SELECT id, export_dt
FROM person_stg
ORDER BY export_dt, id
;
-- Declare continue handler
DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET done=1;
-- Open cursor
OPEN c1;
-- Loop through all rows
REPEAT
-- Get record
FETCH c1 INTO p_id, p_export_dt;
-- Call add record procedure
CALL sp_add_record(p_id,p_export_dt);
-- End of loop
UNTIL done END REPEAT;
-- Close cursor
CLOSE c1;
END;
//
DELIMITER ;