目录
查询所有重复的列:这里给到MySQL5.7 和 8.0版本的查询方式
在开发过程中,因为某些问题可能会导致同一条数据在表中重复出现,此时我们需要申请权限走SQL去修复,下面介绍下具体修复流程
测试表
DROP TABLE IF EXISTS `test_table`;
CREATE TABLE `test_table` (
`id` int NOT NULL AUTO_INCREMENT,
`a_id` int NULL DEFAULT NULL,
`a_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of test_table
-- ----------------------------
INSERT INTO `test_table` VALUES (1, 1000, '1000名字');
INSERT INTO `test_table` VALUES (2, 1000, '1000名字');
INSERT INTO `test_table` VALUES (3, 1000, '1000名字');
INSERT INTO `test_table` VALUES (4, 2000, '2000名字');
这里可以看到a_id列有2条重复的数据,我们仅保留最新的一条数据,下面我们开始介绍
方式一:分组查询出每组最大的ID,其余的删除
# 备份表
create table test_table202408207 SELECT * from test_table;
# 执行清理
DELETE a
FROM
test_table a
WHERE
NOT EXISTS (
SELECT
1
FROM
( SELECT MAX( id ) AS max_id FROM test_table GROUP BY a_id ) AS b
WHERE
a.id = b.max_id
);
方式二:先标记重复待清理的数据,检查后清理
# 备份表
create table test_table_202408207 SELECT * from test_table;
# 给原表添加一个辅助列,比如叫`to_delete`
ALTER TABLE test_table ADD COLUMN to_delete BOOLEAN NOT NULL DEFAULT FALSE;
# 使用自连接和分组来标记重复记录
UPDATE test_table a
JOIN (
SELECT
a_id,
MAX(id) AS max_id
FROM
test_table
GROUP BY
a_id
HAVING COUNT(*) > 1
) b ON a.a_id = b.a_id
SET a.to_delete = TRUE
WHERE a.id <> b.max_id;
# 删除被标记为删除的记录
DELETE FROM test_table WHERE to_delete = TRUE;
# 删除辅助列
ALTER TABLE test_table DROP COLUMN to_delete;
附言
查询所有重复的列:这里给到MySQL5.7 和 8.0版本的查询方式
# 5.7版本查询方式(包含所有重复行)
# 若数据量量稍大一些,这里的查询将会非常的慢
SELECT
a.*
FROM
test_table a
JOIN ( SELECT a_id FROM test_table GROUP BY a_id HAVING COUNT(*) > 1 ) b ON a.a_id = b.a_id;
# 8.0版本查询方式(不包含重复的最新行)
SELECT
*
FROM
( SELECT a.*, ROW_NUMBER() OVER ( PARTITION BY a_id ORDER BY id DESC ) AS rn FROM test_table a ) AS subquery
WHERE
subquery.rn > 1;
标签:删除,查询,MySQL,test,table,重复记录,id,SELECT,1000
From: https://blog.csdn.net/weixin_44700876/article/details/140990656