oracle去重(面试)
现有Oracle中数据如下:
第一种方法:
使用多层嵌套查询,这种方法会多次扫描表
1.通过emp_no进行分组,查询出每个分组中最大的ID
select max(id) from employee t group by t.emp_no having count(1) > 1;
2.删除重复记录,但这样只能删除具有2条的重复记录。如果具有2条以上的重复记录,则无法删除
delete from employee t where t.id in (select max(t1.id) from employee t1 group by t1.emp_no having count(1) > 1)
优化如下:
①获取有重复记录的员工记录
select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)
②从重复记录的员工号中获取最小的id
select min(t2.id) from (select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)) t2 group by t2.emp_no
③将①和②结合查询出多余的重复记录
select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)
and t1.id not in (
select min(t2.id) from (select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)) t2 group by t2.emp_no
)
④删除重复记录
delete from employee t3 where t3.id in (
select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)
and not in (
select min(t2.id) from (select t1.* from employee t1 where t1.emp_no in (select t.emp_no from employee t group by t.emp_no having count(1) > 1)) t2 group by t2.emp_no
)
)
第二种方法(推荐):
使用row_number() over()
1.先通过emp_no进行分组,然后通过sal进行排序,得到序列seq
select t.* , row_number() over(partition by t.emp_no order by t.sal desc) seq from employee t
2.获取每个分组中序列seq大于1记录的id
select t1.id from (select t.*, row_number() over(partition by t.emp_no order by t.sal desc) seq from employee t) t1 where t1.seq > 1
3.删除重复记录
delete from employee t2 where t2.id in (select t1.id from (select t.*, row_number() over(partition by t.emp_no order by t.sal desc) seq from employee t) t1 where t1.seq > 1)