Nov 29, 2014

Delete duplicate records from table in sql server


Here I am creating a duplicate scenario for the employee table, use below query..
CREATE TABLE TEMP_EMPLOYEE
(
empId INT IDENTITY(1,1),
FName VARCHAR(100),
LName VARCHAR(100),
Salary DECIMAL(18,2),
Country VARCHAR(100)
)
INSERT INTO TEMP_EMPLOYEE(FName,LName,Salary,Country)
VALUES ('Dilip','Singh',20000,'IND')
INSERT INTO TEMP_EMPLOYEE(FName,LName,Salary,Country)
VALUES ('Raj','Kumar',30000,'USA')
INSERT INTO TEMP_EMPLOYEE(FName,LName,Salary,Country)
VALUES ('Anil','Singh',25000,'AUS')
INSERT INTO TEMP_EMPLOYEE(FName,LName,Salary,Country)
VALUES ('Dilip','Singh',20000,'IND')
INSERT INTO TEMP_EMPLOYEE(FName,LName,Salary,Country)
VALUES ('Dilip','Singh',20000,'IND')
SELECT * FROM TEMP_EMPLOYEE

--Before the deleting duplicate records you must check it is exist in your table or not
--We can check duplicate records exist in table using bellow query

SELECT FName,LName,Salary,Country FROM TEMP_EMPLOYEE
GROUP BY FName,LName,Salary,Country HAVING COUNT(*)>1

-- To remove Duplicate records from table you can use below query

DELETE FROM TEMP_EMPLOYEE WHERE empId NOT IN ( SELECT MAX(empId) FROM TEMP_EMPLOYEE GROUP BY FName,LName,Salary,Country )
--**Use above query, when you want keep latest record

--Otherwise you can use below query

DELETE FROM TEMP_EMPLOYEE WHERE empId NOT IN ( SELECT MIN(empId) FROM TEMP_EMPLOYEE GROUP BY FName,LName,Salary,Country )

-- you can remove duplicate records by using Common Table Expression query given below

;WITH CTE AS (
SELECT FName,LName,Salary,Country,ROW_NUMBER() OVER (PARTITION BY FName,LName,Salary,Country ORDER BY FName,LName,Salary,Country) AS [Rank] FROM TEMP_EMPLOYEE
)
DELETE FROM CTE WHERE [Rank]>1

SELECT * FROM TEMP_EMPLOYEE
DROP TABLE TEMP_EMPLOYEE