Question

前言，我对数据库设计不是很有经验。我有一张哈希和ids表。添加一组新哈希时，组中的每一行都会获得相同的ID。如果数据库中已存在新组中的任何哈希，则新组和现有组中的所有哈希都会获得一个新的共享ID（在重复哈希时有效地合并ID）：

INSERT INTO hashes 
    (id, hash) 
VALUES 
    ($new_id, ...), ($new_id, ...)
ON DUPLICATE KEY UPDATE 
    repeat_count = repeat_count + 1;

INSERT INTO hashes_lookup SELECT DISTINCT id FROM hashes WHERE hash IN (...);
UPDATE hashes JOIN hashes_lookup USING (id) SET id = '$new_id';
TRUNCATE TABLE hashes_lookup;

其他表引用这些id，因此如果id发生更改，则外键约束会负责更新表中的id。但是，这里的问题是我无法在任何子表中强制实现唯一性。如果我这样做，我的查询将失败：

表'...'的外键约束，记录'...'会导致表'...'中的重复条目

如果id和value是复合唯一键的测试用例，则此错误有意义：

id | value
---+-------
a  | 1
b  | 2
c  | 1

然后a变为c：

id | value
---+-------
c  | 1
b  | 2
c  | 1

但c,1已经存在。

如果存在ON UPDATE IGNORE CASCADE选项，那将是理想的，因此如果存在重复行，则忽略任何重复插入。但是，我很确定这里真正的问题是我的数据库设计，所以我对所有建议都持开放态度。我目前的解决方案是不强制子表的唯一性，这会导致大量冗余行。

编辑：

CREATE TABLE `hashes` (
 `hash` char(64) NOT NULL,
 `id` varchar(128) NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `hash` (`hash`) USING BTREE,
 KEY `id` (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `emails` (
 `id` varchar(128) NOT NULL,
 `group_id` char(5) NOT NULL,
 `email` varchar(500) NOT NULL,
 KEY `index` (`id`) USING BTREE,
 UNIQUE KEY `id` (`id`,`group_id`,`email`(255)) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`id`) REFERENCES `hashes` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

Answer 1

我认为创建表hash_group来存储哈希组的id是很好的：

CREATE TABLE `hash_group` (
 `id` BIGINT AUTO_INCREMENT NOT NULL,
 `group_name` varchar(128) NOT NULL,
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `group_name` (`group_name`) USING BTREE,
 PRIMARY KEY (`id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

并改变现有表格的结构：

CREATE TABLE `hashes` (
 `hash` char(64) NOT NULL,
 `hash_group_id` BIGINT NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 `insert_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 `update_timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
 UNIQUE KEY `hash` (`hash`) USING BTREE,
 KEY `hashes_hash_group_id_index` (`hash_group_id`) USING BTREE,
 CONSTRAINT `hashes_hash_group_id_fk` FOREIGN KEY (`hash_group_id`) REFERENCES `hash_group` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

CREATE TABLE `emails` (
 `hash_group_id` BIGINT NOT NULL,
 `group_id` char(5) NOT NULL,
 `email` varchar(500) NOT NULL,
 KEY `emails_hash_group_id_index` (`hash_group_id`) USING BTREE,
 UNIQUE KEY `emails_unique` (`hash_group_id`,`group_id`,`email`(255)) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`hash_group_id`) REFERENCES `hash_group` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;

如果需要，还可以创建更新哈希组的触发器：

DELIMITER $$
CREATE TRIGGER `update_hash_group_name` AFTER UPDATE ON `hashes`
FOR EACH ROW
BEGIN
    UPDATE `hash_group` 
    SET `group_name` = md5(now()) -- replace to you hash formula
    WHERE id = NEW.hash_group_id;
END;$$
DELIMITER ;

创建获取实际组ID的函数：

DROP FUNCTION IF EXISTS get_hash_group;

DELIMITER $$
CREATE FUNCTION get_hash_group(id INT) RETURNS INT
BEGIN
  IF (id IS NULL) THEN
    INSERT INTO `hash_group` (`group_name`) 
    VALUES (md5(now())); -- replace to you hash
    RETURN LAST_INSERT_ID();
  END IF;

  RETURN id;
END;$$
DELIMITER ;

情景：

初始填写：

INSERT INTO `hash_group` (id, group_name) VALUES 
(1, 'test1'),
(2, 'test2'),
(3, 'test3');

INSERT INTO `hashes` (hash, hash_group_id) VALUES
('hash11', 1),
('hash12', 1),
('hash13', 1),
('hash2', 2),
('hash3', 3);

INSERT INTO `emails` (hash_group_id, group_id, email)
VALUES
(1, 'g1', 'example1@'),
(2, 'g1', 'example2@'),
(3, 'g1', 'example2@');

更新hash_group方案：

START TRANSACTION;

-- Get @min_group_id - minimum group id (we will leave this id and delete other)

SELECT MIN(hash_group_id) INTO @min_group_id
FROM hashes 
WHERE hash IN ('hash11', 'hash12', 'hash2', 'hash15');

-- Replace other group ids in email table to @min_group_id

UPDATE `emails` 
SET `hash_group_id` = @min_group_id
WHERE `hash_group_id` IN (
  SELECT hash_group_id
  FROM hashes 
  WHERE @min_group_id IS NOT NULL
  AND hash IN ('hash11', 'hash12', 'hash2', 'hash15')
  -- Update only if we are gluy several hash_groups
  AND `hash_group_id` > @min_group_id
);

-- Delete other hash_groups and leave only group with @min_group_id

DELETE FROM `hash_group` WHERE `id` IN (
  SELECT hash_group_id
  FROM hashes 
  WHERE @min_group_id IS NOT NULL
  AND hash IN ('hash11', 'hash12', 'hash2', 'hash15')
  -- Delete only if we are gluy several hash_groups
  AND `hash_group_id` > @min_group_id
);

-- @group_id = existing hash_group.id or create new if @min_group_id is null (all inserted hashes are new)

SELECT get_hash_group(@min_group_id) INTO @group_id;

-- Now we can insert new hashes.

INSERT INTO `hashes` (hash, hash_group_id) VALUES
('hash11', @group_id),
('hash12', @group_id),
('hash2', @group_id),
('hash15', @group_id)
ON DUPLICATE KEY 
UPDATE repeat_count = repeat_count + 1;


COMMIT;

Answer 2

我可能错了，但我认为你错误地命名了id中的hashes字段。

我认为您应该将id中的hashes字段重命名为group_id，然后将AUTO_INCREMENT字段称为id，该字段也应为主要字段在hashes中，id中的emails会引用此字段。如果您想要更新所有哈希并将其关联在一起，则需要更新group_id字段而不是id，并且id在整个表格中保持唯一。

这样您可以避免级联问题，也可以始终知道电子邮件所指的原始哈希值。当然，如果你想获取与电子邮件（旧的和新的）相关的所有哈希值，你必须执行exectue和额外的查询，但我认为它可以解决你所有的问题。

编辑：
您可以使用触发器来执行此操作

触发器就像这样

DELIMITER $$
CREATE TRIGGER `update_hash_id` AFTER UPDATE ON `hashes`
FOR EACH ROW
BEGIN
    UPDATE `emails` SET `id` = NEW.id WHERE `id` = OLD.id;
END;$$
DELIMITER ;

并且您也必须删除外键关系。

Answer 3

为每个子表添加一个额外的整数列可以通过将其用作主键来完全避免此问题。密钥永远不会改变，因为它不是对其他任何东西的引用。

使用复合键作为主键通常是您要避免的。考虑到这个关键组合并不总是唯一的，我肯定会说你需要在所有子表中都有一个专用的主键来解决这个问题。

您甚至可以自动递增它，这样您就不会每次都手动分配它。例如..

Create Table exampleTable
(   
    trueID int NOT NULL AUTO_INCREMENT,
    col1 int NOT NULL,
    col2 varChar(50)
    PRIMARY KEY(trueID)
)

然后，当子表中的两个行设置为相同的值（无论出于何种原因）时，主键保持唯一，从而防止可能出现的数据库中的任何冲突。

Answer 4

解决方案，我们已经进入聊天chat：

/* Tables */

CREATE TABLE `emails` (
 `group_id` bigint(20) NOT NULL,
 `email` varchar(500) NOT NULL,
 UNIQUE KEY `group_id` (`group_id`,`email`) USING BTREE,
 CONSTRAINT `emails_ibfk_1` FOREIGN KEY (`group_id`) REFERENCES `entities` (`group_id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `hashes` (
 `group_id` bigint(20) NOT NULL,
 `hash` varchar(128) NOT NULL,
 `repeat_count` int(11) NOT NULL DEFAULT '0',
 UNIQUE KEY `hash` (`hash`),
 KEY `group_id` (`group_id`),
 CONSTRAINT `hashes_ibfk_1` FOREIGN KEY (`group_id`) REFERENCES `entities` (`group_id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `entities` (
 `group_id` bigint(20) NOT NULL,
 `entity_id` bigint(20) NOT NULL,
 PRIMARY KEY (`group_id`),
 KEY `entity_id` (`entity_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

CREATE TABLE `entity_lookup` (
 `entity_id` bigint(20) NOT NULL,
 PRIMARY KEY (`entity_id`) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1

/* Inserting */

START TRANSACTION;

/* Determine next group ID */
SET @next_group_id = (SELECT MAX(group_id) + 1 FROM entities);

/* Determine next entity ID */
SET @next_entity_id = (SELECT MAX(entity_id) + 1 FROM entities);

/* Merge any entity ids */
INSERT IGNORE INTO entity_lookup SELECT entity_id FROM entities JOIN hashes USING(group_id) WHERE HASH IN(...);
UPDATE entities JOIN entity_lookup USING(entity_id) SET entity_id = @next_entity_id;
TRUNCATE TABLE entity_lookup;

/* Add the new group ID to entity_id */
INSERT INTO entities(group_id, entity_id) VALUES(@next_group_id, @next_entity_id);

/* Add new values into hashes */
INSERT INTO hashes (group_id, HASH) VALUES 
    (@next_group_id, ...)
ON DUPLICATE KEY UPDATE
  repeat_count = repeat_count + 1;

/* Add other new values */
INSERT IGNORE INTO emails (group_id, email) VALUES
    (@next_group_id, "email1");

COMMIT;

在外键更新时忽略级联？

4 个答案: