SQL Technical Questions

SQL interviews can be tricky! Here are some thought-provoking questions that tested my knowledge: 1️⃣ How do you calculate the median salary in a department using SQL? 2️⃣ What’s the difference between correlated and non-correlated subqueries? 3️⃣ How can you update data in one table based on values from another? 4️⃣ What are window functions and how are they different from GROUP BY? 5️⃣ How do you find gaps in a sequence (like missing invoice numbers)? 6️⃣ How would you delete every nth row from a table? 7️⃣ How can you transpose columns into rows (unpivot) in SQL? 8️⃣ What’s the purpose of the COALESCE function, and how is it different from ISNULL or NVL? 9️⃣ How do you calculate the difference in days, months, or years between two dates? 🔟 How do you write a SQL query to show cumulative percentage of sales per region? 1️⃣1️⃣ How do you rank items within partitions based on a custom sorting logic? 1️⃣2️⃣ What’s the difference between EXISTS and IN? 1️⃣3️⃣ How can you detect and prevent deadlocks in SQL queries? 1️⃣4️⃣ How would you handle slowly changing dimensions in SQL? 1️⃣5️⃣ How do you detect circular references in a hierarchical table structure? 1️⃣6️⃣ What’s the difference between a clustered and non-clustered index? 1️⃣7️⃣ How do you audit changes (INSERT/UPDATE/DELETE) in a table using SQL? 1️⃣8️⃣ What is a recursive CTE, and where would you use it? 1️⃣9️⃣ How do you filter the top 3 products by revenue within each category? 2️⃣0️⃣ What strategies can you use to reduce the execution time of complex joins?


*******************************************************************************


- Sample Data Setup for Most Queries

-- Employees Table

CREATE TABLE Employees (

    EmpID INT PRIMARY KEY,

    Name VARCHAR(100),

    Department VARCHAR(50),

    Salary INT,

    HireDate DATE

);


INSERT INTO Employees VALUES

(1, 'Alice', 'HR', 60000, '2020-01-10'),

(2, 'Bob', 'HR', 55000, '2019-03-15'),

(3, 'Charlie', 'IT', 80000, '2021-06-20'),

(4, 'Diana', 'IT', 95000, '2018-09-12'),

(5, 'Eve', 'Finance', 72000, '2022-02-01');


-- Sales Table

CREATE TABLE Sales (

    Region VARCHAR(50),

    Product VARCHAR(50),

    SalesAmount INT

);


INSERT INTO Sales VALUES

('North', 'A', 100),

('North', 'B', 200),

('North', 'C', 700),

('South', 'A', 150),

('South', 'B', 100),

('South', 'C', 750);


-- Invoice Table

CREATE TABLE Invoices (

    InvoiceID INT PRIMARY KEY

);


INSERT INTO Invoices VALUES

(1001), (1002), (1004), (1005), (1007);


-- Question-by-Question Breakdown


-- 1. Median salary in a department

SELECT Department, 

       PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY Salary) 

       OVER (PARTITION BY Department) AS MedianSalary

FROM Employees;


-- 2. Correlated vs Non-Correlated Subqueries

-- Correlated:

SELECT Name FROM Employees e

WHERE Salary > (SELECT AVG(Salary) FROM Employees WHERE Department = e.Department);

-- Non-Correlated:

SELECT Name FROM Employees

WHERE Salary > (SELECT AVG(Salary) FROM Employees);


-- 3. Update based on another table

-- Assume we have a Bonus Table

CREATE TABLE Bonus (

    EmpID INT,

    BonusAmount INT

);

INSERT INTO Bonus VALUES (1, 5000), (2, 3000);


UPDATE Employees

SET Salary = Salary + b.BonusAmount

FROM Employees e

JOIN Bonus b ON e.EmpID = b.EmpID;


-- 4. Window functions vs GROUP BY

-- Window:

SELECT Name, Department, Salary,

       AVG(Salary) OVER (PARTITION BY Department) AS AvgDeptSalary

FROM Employees;

-- GROUP BY:

SELECT Department, AVG(Salary) AS AvgDeptSalary

FROM Employees

GROUP BY Department;


-- 5. Gaps in sequences

SELECT InvoiceID + 1 AS Missing

FROM Invoices i

WHERE NOT EXISTS (SELECT 1 FROM Invoices WHERE InvoiceID = i.InvoiceID + 1);


-- 6. Delete every nth row (e.g., every 2nd row)

WITH Ranked AS (

    SELECT *, ROW_NUMBER() OVER (ORDER BY EmpID) AS rn

    FROM Employees

)

DELETE FROM Employees

WHERE EmpID IN (SELECT EmpID FROM Ranked WHERE rn % 2 = 0);


-- 7. Unpivot

-- Use VALUES clause or UNION

SELECT EmpID, 'Name' AS Attribute, Name AS Value FROM Employees

UNION ALL

SELECT EmpID, 'Department', Department FROM Employees;


-- 8. COALESCE vs ISNULL

-- COALESCE returns first non-null in list (ANSI), ISNULL is T-SQL

SELECT Name, COALESCE(NULL, 'Default') AS Coalesced,

              ISNULL(NULL, 'Default') AS IsNullVersion;


-- 9. Date difference

SELECT DATEDIFF(DAY, '2020-01-01', '2021-01-01') AS DayDiff,

       DATEDIFF(MONTH, '2020-01-01', '2021-01-01') AS MonthDiff,

       DATEDIFF(YEAR, '2020-01-01', '2021-01-01') AS YearDiff;


-- 10. Cumulative % of sales per region

WITH RegionalSales AS (

  SELECT Region, SUM(SalesAmount) AS Total

  FROM Sales

  GROUP BY Region

), RankedSales AS (

  SELECT s.Region, s.Product, s.SalesAmount,

         SUM(SalesAmount) OVER (PARTITION BY s.Region ORDER BY SalesAmount DESC) AS CumSum,

         rs.Total

  FROM Sales s

  JOIN RegionalSales rs ON s.Region = rs.Region

)

SELECT *, 100.0 * CumSum / Total AS CumPercent

FROM RankedSales;


-- 11. Rank within partition

SELECT Name, Department, Salary,

       RANK() OVER (PARTITION BY Department ORDER BY Salary DESC) AS DeptRank

FROM Employees;


-- 12. EXISTS vs IN

-- EXISTS is faster with correlated subqueries

SELECT Name FROM Employees e

WHERE EXISTS (SELECT 1 FROM Bonus b WHERE b.EmpID = e.EmpID);


SELECT Name FROM Employees

WHERE EmpID IN (SELECT EmpID FROM Bonus);


-- 13. Deadlock prevention

-- Strategies:

-- 1. Access tables in same order

-- 2. Keep transactions short

-- 3. Use low isolation levels where safe (e.g., READ COMMITTED)

-- 4. Avoid user interaction inside transactions


-- 14. Slowly Changing Dimensions (SCD Type 2)

-- Add effective date, expiry date, current flag to dimension tables

-- INSERT new row instead of updating


-- 15. Circular reference detection

-- Use Recursive CTE with loop detection

WITH Hierarchy AS (

    SELECT EmpID, ManagerID, CAST(EmpID AS VARCHAR(MAX)) AS Path

    FROM Org

    UNION ALL

    SELECT o.EmpID, o.ManagerID, h.Path + '->' + CAST(o.EmpID AS VARCHAR)

    FROM Org o

    JOIN Hierarchy h ON o.ManagerID = h.EmpID

    WHERE h.Path NOT LIKE '%->' + CAST(o.EmpID AS VARCHAR) + '%'

)

SELECT * FROM Hierarchy;


-- 16. Clustered vs Non-Clustered Index

-- Clustered: Data is physically ordered

-- Non-Clustered: Separate from actual table, contains pointer

-- One clustered index per table, many non-clustered allowed


-- 17. Auditing changes

-- Use Triggers

CREATE TRIGGER trg_EmployeeAudit

ON Employees

AFTER INSERT, UPDATE, DELETE

AS

BEGIN

    -- Logic to log into an audit table

END;


-- 18. Recursive CTE example

WITH Numbers AS (

  SELECT 1 AS Num

  UNION ALL

  SELECT Num + 1 FROM Numbers WHERE Num < 10

)

SELECT * FROM Numbers;


-- 19. Top 3 products by revenue per category

WITH Ranked AS (

  SELECT *, RANK() OVER (PARTITION BY Region ORDER BY SalesAmount DESC) AS rnk

  FROM Sales

)

SELECT * FROM Ranked WHERE rnk <= 3;


-- 20. Optimizing complex joins

-- Strategies:

-- 1. Index join keys

-- 2. Filter early with WHERE

-- 3. Avoid SELECT *

-- 4. Break into temp tables or CTEs

-- 5. Use EXISTS instead of IN for large subqueries

Comments

Popular posts from this blog

SyBase Database Migration to SQL Server

Basics of US Healthcare -Medical Billing