Different Types of Script to create in etl pipeline

/* 
 * Problem Statement:
 * Create incremental , backfill , validation & cleanup script for tgt_sales table
 */

/* Source OLTP Table */
DROP  TABLE IF EXISTS  oltp_sales;
CREATE TABLE oltp_sales (
    sales_id INT PRIMARY KEY,
    store_id INT,
    product_name VARCHAR(50),
    quantity_sold INT,
    sales_amount DECIMAL(10, 2),
    sales_time TIMESTAMP
);

INSERT INTO oltp_sales VALUES
(101, 101, 'iPhone 12', 10, 10000.00, '2024-01-20 09:15:00'),
(102, 102, 'MacBook Air', 5, 5000.00, '2024-01-20 10:30:00'),
(103, 103, 'iPad Pro', 8, 8000.00, '2024-01-20 11:45:00'),
(104, 201, 'iWatch Series 6', 12, 6000.00, '2024-01-20 13:00:00'),
(105, 202, 'iPod Touch', 15, 1500.00, '2024-01-20 14:15:00'),
(106, 101, 'iPhone SE', 7, 7000.00, '2024-01-20 15:30:00'),
(107, 102, 'MacBook Pro', 3, 3000.00, '2024-01-20 16:45:00'),
(108, 103, 'iPad Mini', 6, 6000.00, '2024-01-20 18:00:00'),
(109, 201, 'AirPods Pro', 20, 2000.00, '2024-01-20 19:15:00'),
(110, 202, 'iMac', 4, 4000.00, '2024-01-20 20:30:00'),
(111, 101, 'iPhone 13', 9, 9000.00, '2024-01-21 09:15:00'),
(112, 102, 'Mac Mini', 2, 2000.00, '2024-01-21 10:30:00'),
(113, 103, 'iPad Air', 10, 10000.00, '2024-01-21 11:45:00'),
(114, 201, 'Apple Watch SE', 15, 1500.00, '2024-01-21 13:00:00'),
(115, 202, 'iPod Nano', 8, 800.00, '2024-01-21 14:15:00'),
(116, 101, 'iPhone XR', 5, 5000.00, '2024-01-21 15:30:00'),
(117, 102, 'Mac Pro', 1, 10000.00, '2024-01-21 16:45:00'),
(118, 103, 'iPad 9th Gen', 12, 12000.00, '2024-01-21 18:00:00'),
(119, 201, 'AirPods Max', 3, 3000.00, '2024-01-21 19:15:00'),
(120, 202, 'Mac Studio', 6, 6000.00, '2024-01-21 20:30:00'),
(121, 101, 'iPhone SE 2nd Gen', 8, 8000.00, '2024-01-22 09:15:00'),
(122, 102, 'MacBook 12-inch', 4, 4000.00, '2024-01-22 10:30:00'),
(123, 103, 'iPad 8th Gen', 15, 15000.00, '2024-01-22 11:45:00'),
(124, 201, 'Apple Watch Series 3', 7, 700.00, '2024-01-22 13:00:00'),
(125, 202, 'iPod Shuffle', 20, 200.00, '2024-01-22 14:15:00'),
(126, 101, 'iPhone 11', 10, 10000.00, '2024-01-22 15:30:00'),
(127, 102, 'MacBook Pro 16-inch', 5, 5000.00, '2024-01-22 16:45:00'),
(128, 103, 'iPad Pro 11-inch', 8, 8000.00, '2024-01-22 18:00:00'),
(129, 201, 'AirPods 3rd Gen', 12, 1200.00, '2024-01-22 19:15:00'),
(130, 202, 'iMac Pro', 2, 20000.00, '2024-01-22 20:30:00'),
(131, 101, 'iPhone XS', 5, 5000.00, '2024-01-23 09:15:00'),
(132, 102, 'MacBook Air', 3, 3000.00, '2024-01-23 10:30:00'),
(133, 103, 'iPad Pro', 10, 10000.00, '2024-01-23 11:45:00'),
(134, 201, 'iWatch Series 6', 8, 8000.00, '2024-01-23 13:00:00'),
(135, 202, 'iPod Touch', 15, 1500.00, '2024-01-23 14:15:00'),
(136, 101, 'iPhone SE', 6, 6000.00, '2024-01-23 15:30:00'),
(137, 102, 'MacBook Pro', 4, 4000.00, '2024-01-23 16:45:00'),
(138, 103, 'iPad Mini', 12, 12000.00, '2024-01-23 18:00:00'),
(139, 201, 'AirPods Pro', 20, 2000.00, '2024-01-23 19:15:00'),
(140, 202, 'iMac', 7, 7000.00, '2024-01-23 20:30:00'),
(141, 101, 'iPhone 13', 9, 9000.00, '2024-01-23 09:15:00'),
(142, 102, 'Mac Mini', 2, 2000.00, '2024-01-24 10:30:00'),
(143, 103, 'iPad Air', 10, 10000.00, '2024-01-24 11:45:00'),
(144, 201, 'Apple Watch SE', 15, 1500.00, '2024-01-24 13:00:00'),
(145, 202, 'iPod Nano', 8, 800.00, '2024-01-24 14:15:00'),
(146, 101, 'iPhone XR', 5, 5000.00, '2024-01-24 15:30:00'),
(147, 102, 'Mac Pro', 1, 10000.00, '2024-01-24 16:45:00'),
(148, 103, 'iPad 9th Gen', 12, 12000.00, '2024-01-24 18:00:00'),
(149, 201, 'AirPods Max', 3, 3000.00, '2024-01-24 19:15:00'),
(150, 202, 'Mac Studio', 6, 6000.00, '2024-01-24 20:30:00'),
(151, 101, 'iPhone 13', 9, 9000.00, '2024-02-23 09:15:00'),
(152, 102, 'Mac Mini', 2, 2000.00, '2024-02-24 10:30:00'),
(153, 103, 'iPad Air', 10, 10000.00, '2024-02-24 11:45:00'),
(154, 201, 'Apple Watch SE', 15, 1500.00, '2024-02-24 13:00:00'),
(155, 202, 'iPod Nano', 8, 800.00, '2024-02-24 14:15:00'),
(156, 101, 'iPhone XR', 5, 5000.00, '2024-02-24 15:30:00'),
(157, 102, 'Mac Pro', 1, 10000.00, '2024-02-24 16:45:00'),
(158, 103, 'iPad 9th Gen', 12, 12000.00, '2024-02-24 18:00:00'),
(159, 201, 'AirPods Max', 3, 3000.00, '2024-02-24 19:15:00'),
(160, 202, 'Mac Studio', 6, 6000.00, '2024-02-24 20:30:00');



/* Target Stage Table */
DROP  TABLE IF EXISTS  stg_sales;
CREATE TABLE stg_sales (
    sales_id INT PRIMARY KEY,
    store_id INT,
    product_name VARCHAR(50),
    quantity_sold INT,
    sales_amount DECIMAL(10, 2),
    sales_time TIMESTAMP
);


/* Target EDW Table */
DROP  TABLE IF EXISTS  edw_sales;
CREATE TABLE edw_sales (
    sales_id INT PRIMARY KEY,
    store_id INT,
    product_name VARCHAR(50),
    quantity_sold INT,
    sales_amount DECIMAL(10, 2),
    sales_time TIMESTAMP
);

INSERT INTO  edw_sales values
(1, 101, 'iPhone 12', 10, 10000.00, '2016-01-20 09:15:00'),
(2, 102, 'MacBook Air', 5, 5000.00, '2017-01-20 10:30:00'),
(3, 103, 'iPad Pro', 8, 8000.00, '2017-01-20 11:45:00'),
(4, 201, 'iWatch Series 6', 12, 6000.00, '2017-01-20 13:00:00'),
(5, 202, 'iPod Touch', 15, 1500.00, '2017-01-20 14:15:00'),
(6, 101, 'iPhone SE', 7, 7000.00, '2017-01-20 15:30:00'),
(7, 102, 'MacBook Pro', 3, 3000.00, '2017-01-20 16:45:00');

INSERT INTO  edw_sales
SELECT * FROM oltp_sales WHERE   cast(sales_time AS date) < CAST('2024-02-25' AS date) - INTERVAL '1 day';

DELETE FROM edw_sales WHERE sales_id IN (105,106,112);

-- preview the data
SELECT * FROM oltp_sales;
SELECT * FROM stg_sales;
SELECT * FROM edw_sales ;

-- Setup complete. Now create the scripts.


-- oltp
select * from oltp_sales;

--stg
select * from stg_sales;

--edw
select * from edw_sales;

/* incremental script */
-- daily job (date : 25-Feb-2024)
-- 24th Feb 2024

delete from stg_sales;
insert into stg_sales
select * from oltp_sales
where cast(sales_time as date) = cast('2024-02-25' as date) - interval '1 day';

delete from edw_sales
using stg_sales
where stg_sales.sales_id = edw_sales.sales_id
and stg_sales.sales_time > edw_sales.sales_time;

insert into edw_sales
select t1.* from stg_sales t1
left join edw_sales t2
on
t1.sales_id = t2.sales_id
and t2.sales_id is null;

/* validation script */

with cte_oltp_val as (
    select store_id,cast(sales_time as date) as sales_date,
    sum(sales_amount) as total_sales
    from oltp_sales WHERE cast(sales_time as date)
    between '2024-01-01' and '2024-01-31'
    group by 1,2
),
cte_edw_val as (
    select store_id,cast(sales_time as date) as sales_date,
    sum(sales_amount) as total_sales
    from edw_sales WHERE cast(sales_time as date)
    between '2024-01-01' and '2024-01-31'
    group by 1,2
)
select t1.store_id, t1.sales_date,
t1.total_sales as oltp_sales,
t2.total_sales as edw_sales,
case when t1.total_sales <> t2.total_sales 
then 'NOT MATCH'
else 'MATCH' end as val_result
from
cte_oltp_val t1
left outer join 
cte_edw_val t2
on
t1.store_id = t2.store_id
and t1.sales_date = t2.sales_date;

/* backfill script */

delete from stg_sales;

insert into stg_sales
select * from oltp_sales
where cast(sales_time as date) between cast('2024-01-01' as date) 
and cast('2024-01-31' as date);

delete from edw_sales 
where cast(sales_time as date) between cast('2024-01-01' as date) 
and cast('2024-01-31' as date);

insert into edw_sales 
select * from stg_sales;

/* cleanup script */



delete from edw_sales 
where cast(sales_time as date) < cast ('2024-02-25' as date) - interval '5 year'





Leave a Comment

Your email address will not be published. Required fields are marked *