我的问题很难在标题中解释,因此我会显示数据和目标。 有一个MySQL表,结构如下:
CREATE TABLE customerProjectData(
idCustomer INT NOT NULL,
idProject INT DEFAULT NULL,
comePersons SMALLINT DEFAULT NULL,
comePairs SMALLINT DEFAULT NULL,
comment VARCHAR(255) DEFAULT NULL,
idCity INT DEFAULT NULL,
idStreet INT DEFAULT NULL,
name VARCHAR(64) DEFAULT NULL,
surname VARCHAR(64) DEFAULT NULL,
homeNum VARCHAR(10) DEFAULT NULL,
postCode CHAR(6) DEFAULT NULL,
postCity VARCHAR(64) DEFAULT NULL,
cellPhone VARCHAR(12) DEFAULT NULL
)
问题是,还有PRIMARY KEY(idCustomer, idProject)
定义,但事实并非如此。结果有一些重复(具有相同的主键)但具有不同的数据。
我可以运行ALTER IGNORE TABLE
,但数据丢失可能是不可接受的,也是不可预测的。最后,我们决定尝试使用重复值填充空字段(如果它们包含数据),然后运行ALTER IGNORE TABLE
。这种情况下丢失的数据要少得多,在这种情况下它是可以接受的(它比现在更长时间的观点更好)。
问题是如何从每个副本填充这些字段。
答案 0 :(得分:1)
这是一个粗略的尝试。
首先尝试找出没有。具有相同密钥的行。
<?php
// $link is the database identifier
$sql = 'SELECT COUNT(*) AS num, * FROM `customerProjectData` GROUP BY `idCustomer`, `idProject` HAVING COUNT(*) > 1 ORDER BY COUNT(*) ASC;';
$run = mysql_query( $sql, $link );
$rows = array();
if( $run && mysql_num_rows( $run ) ) {
while( ( $fetch = mysql_fetch_assoc( $run ) ) !== false ) {
$rows[] = $fetch;
}
}
?>
现在$rows
包含所有行的列表,这些行具有相同的密钥,并且count
表示此密钥在表中重复的次数。
您可以编写一个函数,然后迭代count
次,查看哪些行包含完整数据,并使用该函数用该记录的数据填充其他记录。
有点反复试验。
答案 1 :(得分:0)
我使用@ web-nomad建议并做了类似的事情,但是在sql过程中:
DROP PROCEDURE IF EXISTS correctCPD$$
CREATE PROCEDURE correctCPD()
BEGIN
DECLARE currentCustomerId INT;
DECLARE currentProjectId INT;
DECLARE cur_idCustomer INT;
DECLARE cur_idProject INT;
DECLARE cur_comePersons SMALLINT;
DECLARE cur_comePairs SMALLINT;
DECLARE cur_comment VARCHAR(255);
DECLARE cur_idCity INT;
DECLARE cur_idStreet INT;
DECLARE cur_name VARCHAR(64);
DECLARE cur_surname VARCHAR(64);
DECLARE cur_homeNum VARCHAR(10);
DECLARE cur_postCode CHAR(6);
DECLARE cur_postCity VARCHAR(64);
DECLARE cur_cellPhone VARCHAR(12);
CREATE TEMPORARY TABLE ids (
idCustomer INT,
idProject INT
) ENGINE = InnoDB;
INSERT INTO ids
SELECT idCustomer, idProject FROM customerprojectdata group by idCustomer, idProject having count(*) > 1;
BLOCK1: BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE itemCur CURSOR FOR SELECT idCustomer, idProject FROM ids;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
OPEN itemCur;
itemCurLoop: LOOP
FETCH itemCur INTO currentCustomerId, currentProjectId;
IF done THEN
LEAVE itemCurLoop;
END IF;
BLOCK2: BEGIN
DECLARE doneIn INT DEFAULT FALSE;
DECLARE cpdCur CURSOR FOR SELECT idCustomer, idProject, comePersons, comePairs, comment, idCity, idStreet, name, surname, homeNum, postCode, postCity, cellPhone FROM customerProjectData WHERE idCustomer = currentCustomerId AND idProject = currentProjectId;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET doneIn = TRUE;
OPEN cpdCur;
cpdCurLoop: LOOP
FETCH cpdCur INTO
cur_idCustomer, cur_idProject, cur_comePersons, cur_comePairs,
cur_comment, cur_idCity, cur_idStreet, cur_name, cur_surname,
cur_homeNum, cur_postCode, cur_postCity, cur_cellPhone;
IF doneIn THEN
LEAVE cpdCurLoop;
END IF;
UPDATE CustomerProjectData SET
comePersons = IF((comePersons IS NULL OR comePersons = '') AND cur_comePersons > 0, cur_comePersons, comePersons),
comePairs = IF((comePairs IS NULL OR comePairs = '') AND cur_comePairs > 0, cur_comePairs, comePairs),
comment = IF((comment IS NULL OR comment = '') AND cur_comment > 0, cur_comment, comment),
idCity = IF((idCity IS NULL AND idStreet IS NULL) AND cur_idCity > 0, cur_idCity, idCity),
idStreet = IF(((idCity IS NULL OR idCity = cur_idCity) AND idStreet IS NULL) AND cur_idStreet > 0, cur_idStreet, idStreet),
name = IF((name IS NULL OR name = '') AND cur_name > 0, cur_name, name),
surname = IF((surname IS NULL OR surname = '') AND cur_surname > 0, cur_surname, surname),
homeNum = IF((homeNum IS NULL OR homeNum = '') AND cur_homeNum > 0, cur_homeNum, homeNum),
postCode = IF((postCode IS NULL OR postCode = '') AND cur_postCode > 0, cur_postCode, postCode),
postCity = IF((postCity IS NULL OR postCity = '') AND cur_postCity > 0, cur_postCity, postCity),
cellPhone = IF((cellPhone IS NULL OR cellPhone = '') AND cur_cellPhone > 0, cur_cellPhone, cellPhone)
WHERE idCustomer = currentCustomerId AND idProject = currentProjectId;
END LOOP;
CLOSE cpdCur;
END BLOCK2;
END LOOP;
CLOSE itemCur;
END BLOCK1;
DROP TEMPORARY TABLE ids;
END$$
感谢您的帮助!