Advanced SQL Features

Advanced Oracle SQL features and techniques

Analytical Functions

Comprehensive window functions with detailed explanations and use cases (Access: SELECT privilege required)

-- WINDOW FUNCTIONS - COMPREHENSIVE EXPLANATION
-- ================================================
-- Window functions perform calculations across a set of table rows related to the current row
-- They don't group rows like GROUP BY, but instead provide access to multiple rows from the current row

-- 1. ROW_NUMBER() - Assigns unique sequential numbers to rows within a partition
--    Syntax: ROW_NUMBER() OVER (PARTITION BY column ORDER BY column)
--    Use case: Ranking employees within each department by salary
SELECT 
    employee_id,
    first_name,
    last_name,
    salary,
    department_id,
    -- ROW_NUMBER assigns 1,2,3... for each department, ordered by salary DESC
    ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_rank
FROM employees
ORDER BY department_id, salary DESC;

-- 2. RANK() vs DENSE_RANK() - Different ranking methods
--    RANK: Leaves gaps in ranking (1,2,2,4,5...)
--    DENSE_RANK: No gaps in ranking (1,2,2,3,4...)
SELECT 
    employee_id,
    first_name,
    last_name,
    salary,
    -- RANK: If two employees have same salary, both get rank 2, next gets rank 4
    RANK() OVER (ORDER BY salary DESC) AS overall_rank,
    -- DENSE_RANK: If two employees have same salary, both get rank 2, next gets rank 3
    DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees
ORDER BY salary DESC;

-- 3. LAG() and LEAD() - Access previous and next row values
--    LAG: Gets value from previous row
--    LEAD: Gets value from next row
--    Useful for: Comparing current value with previous/next values
SELECT 
    employee_id,
    first_name,
    last_name,
    salary,
    -- LAG gets the salary of the previous employee (ordered by salary)
    LAG(salary, 1) OVER (ORDER BY salary) AS prev_salary,
    -- LEAD gets the salary of the next employee (ordered by salary)
    LEAD(salary, 1) OVER (ORDER BY salary) AS next_salary,
    -- Calculate difference from previous salary
    salary - LAG(salary, 1) OVER (ORDER BY salary) AS salary_difference
FROM employees
ORDER BY salary;

-- 4. FIRST_VALUE() and LAST_VALUE() - Get first/last values in window
--    FIRST_VALUE: Gets the first value in the window
--    LAST_VALUE: Gets the last value in the window (requires ROWS clause)
SELECT 
    employee_id,
    first_name,
    last_name,
    salary,
    department_id,
    -- Get the highest salary in each department
    FIRST_VALUE(salary) OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_max_salary,
    -- Get the lowest salary in each department (using ROWS clause for proper window)
    LAST_VALUE(salary) OVER (PARTITION BY department_id ORDER BY salary DESC 
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS dept_min_salary
FROM employees
ORDER BY department_id, salary DESC;

-- 5. CUMULATIVE FUNCTIONS - Running totals and moving averages
--    SUM() OVER: Calculates running totals
--    AVG() OVER: Calculates moving averages
SELECT 
    order_date,
    amount,
    -- Running total: Sum of all amounts from first row to current row
    SUM(amount) OVER (ORDER BY order_date ROWS UNBOUNDED PRECEDING) AS running_total,
    -- Moving average: Average of current row and 2 rows before/after
    AVG(amount) OVER (ORDER BY order_date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS moving_avg_5_rows,
    -- Count of rows in current window (5 rows: 2 before + current + 2 after)
    COUNT(*) OVER (ORDER BY order_date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS window_count
FROM orders
ORDER BY order_date;

-- 6. PERCENTILE FUNCTIONS - Statistical analysis
--    PERCENT_RANK: Relative rank as percentage (0 to 1)
--    CUME_DIST: Cumulative distribution (0 to 1)
--    NTILE: Divides rows into equal-sized buckets
SELECT 
    employee_id,
    first_name,
    last_name,
    salary,
    department_id,
    -- PERCENT_RANK: 0.0 for lowest salary, 1.0 for highest salary in department
    PERCENT_RANK() OVER (PARTITION BY department_id ORDER BY salary) AS percent_rank,
    -- CUME_DIST: Cumulative distribution function
    CUME_DIST() OVER (PARTITION BY department_id ORDER BY salary) AS cumulative_dist,
    -- NTILE: Divides employees into 4 equal groups (quartiles)
    NTILE(4) OVER (PARTITION BY department_id ORDER BY salary) AS quartile
FROM employees
ORDER BY department_id, salary;

-- 7. WINDOW FRAME CLAUSES - Control which rows are included in calculation
--    ROWS BETWEEN: Physical row-based window
--    RANGE BETWEEN: Value-based window
SELECT 
    employee_id,
    salary,
    -- ROWS: Include 1 row before and 1 row after current row
    AVG(salary) OVER (ORDER BY salary ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avg_3_rows,
    -- RANGE: Include rows with salary within 1000 of current salary
    AVG(salary) OVER (ORDER BY salary RANGE BETWEEN 1000 PRECEDING AND 1000 FOLLOWING) AS avg_range_1000,
    -- UNBOUNDED: Include all rows from start to current
    SUM(salary) OVER (ORDER BY salary ROWS UNBOUNDED PRECEDING) AS cumulative_sum
FROM employees
ORDER BY salary;

Regular Expressions

Regular expression functions for pattern matching and text processing (Access: SELECT privilege required)

-- REGEXP_LIKE for pattern matching
-- Access: Users with SELECT privilege on table
SELECT 
    email,
    phone,
    address
FROM employees
WHERE REGEXP_LIKE(email, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
AND REGEXP_LIKE(phone, '^\d{3}-\d{3}-\d{4}$');

-- REGEXP_SUBSTR for extraction
-- Access: Users with SELECT privilege on table
SELECT 
    full_name,
    REGEXP_SUBSTR(full_name, '^[A-Za-z]+') AS first_name,
    REGEXP_SUBSTR(full_name, '[A-Za-z]+$') AS last_name
FROM employees;

-- REGEXP_REPLACE for substitution
-- Access: Users with SELECT privilege on table
SELECT 
    phone,
    REGEXP_REPLACE(phone, '\D', '') AS digits_only,
    REGEXP_REPLACE(phone, '(\d{3})(\d{3})(\d{4})', '(\1) \2-\3') AS formatted_phone
FROM employees;

-- REGEXP_INSTR for position finding
-- Access: Users with SELECT privilege on table
SELECT 
    email,
    REGEXP_INSTR(email, '@') AS at_position,
    REGEXP_INSTR(email, '\.', 1, 1) AS first_dot_position
FROM employees;

-- Complex pattern matching
-- Access: Users with SELECT privilege on table
SELECT 
    product_code,
    product_name
FROM products
WHERE REGEXP_LIKE(product_code, '^[A-Z]{2}\d{4}$')
AND REGEXP_LIKE(product_name, '^(Laptop|Desktop|Tablet)');

XML and JSON Functions

XML and JSON processing functions (Access: SELECT privilege required)

-- XML functions
-- Access: Users with SELECT privilege on table
SELECT 
    XMLTYPE('<employee><id>1</id><name>John Doe</name><salary>50000</salary></employee>') AS xml_data,
    XMLTYPE('<employee><id>1</id><name>John Doe</name><salary>50000</salary></employee>').EXTRACT('//name/text()') AS name,
    XMLTYPE('<employee><id>1</id><name>John Doe</name><salary>50000</salary></employee>').EXTRACT('//salary/text()').getNumberVal() AS salary
FROM dual;

-- JSON functions (Oracle 12c+)
-- Access: Users with SELECT privilege on table
SELECT 
    JSON_OBJECT('id', 1, 'name', 'John Doe', 'salary', 50000) AS json_object,
    JSON_ARRAY('apple', 'banana', 'cherry') AS json_array,
    JSON_QUERY('{"id": 1, "name": "John Doe", "salary": 50000}', '$.name') AS name_value,
    JSON_VALUE('{"id": 1, "name": "John Doe", "salary": 50000}', '$.salary') AS salary_value
FROM dual;

-- JSON table function
-- Access: Users with SELECT privilege on table
SELECT 
    j.id,
    j.name,
    j.salary
FROM JSON_TABLE(
    '[{"id": 1, "name": "John Doe", "salary": 50000}, {"id": 2, "name": "Jane Smith", "salary": 55000}]',
    '$[*]' COLUMNS (
        id NUMBER PATH '$.id',
        name VARCHAR2(100) PATH '$.name',
        salary NUMBER PATH '$.salary'
    )
) j;

-- XML table function
-- Access: Users with SELECT privilege on table
SELECT 
    x.id,
    x.name,
    x.salary
FROM XMLTABLE(
    '/employees/employee' PASSING XMLTYPE('<employees><employee><id>1</id><name>John Doe</name><salary>50000</salary></employee></employees>')
    COLUMNS (
        id NUMBER PATH 'id',
        name VARCHAR2(100) PATH 'name',
        salary NUMBER PATH 'salary'
    )
) x;