You are on page 1of 10

-- WINDOW FUNCTIONS --

--- 1. Tính tổng: SUM, MIN, MAX, AVG, COUNT --> GROUP BY

--> Khi nào GROUP BY và khi nào nên WINDOW FUNCTIONS ?

-- 2. Xếp hạng : RANK (), ROW_NUMBER(), DENSE_RANK (), NTILE ()

-- 3. Cộng dồn: SUM () OVER ( ORDER BY )

-- 4. Xử lý nâng: LAG (), LEAD (), FIRST_VALUE (), PERCENT_RANK ()

---> Ôn tập + Phân tích data time series

-- PART 1: Ôn tập

-- task 1: Calculate the number of successful transactions of each month in 2019


--Your code here
WITH conditional AS (
SELECT transaction_time, transaction_id, status_description, MONTH (transaction_time)
AS month_info
FROM fact_transaction_2019 AS fact19
LEFT JOIN dim_status AS sta
ON fact19.status_id = sta.status_id
WHERE fact19.status_id = '1'
)
SELECT month_info, COUNT (transaction_id) AS outcome
FROM conditional
GROUP BY month_info

Task 2.1: Calculate the number of successful transactions of each month in 2019 and
2020 (number_success_trans) (using Group By). Then create a column of the total
number of successful transactions of each year (total_trans_year) (using Window
Function). Finally calculate the successful transaction rate (success_rate) of each
month (pct).
-- b1: Gộp data 2 năm lại
-- b2: GROUP BY theo tháng
-- b3: Tìm total cả năm
-- b4: Tính lệ

WITH table_month AS (
SELECT YEAR (transaction_time) [year] , MONTH (transaction_time) [month]
, COUNT ( transaction_id ) number_trans
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id = 1
) unioned_table
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
-- ORDER BY [year], [month]
)
SELECT *
, SUM ( number_trans ) OVER ( PARTITION BY [year] ) total_trans_year
, CAST ( CAST ( number_trans AS FLOAT ) / SUM ( number_trans ) OVER ( PARTITION BY
[year] ) AS DECIMAL (10,2) ) pct
FROM table_month

-- 2.2 Tìm 3 tháng có nhiều gd thất bại nhất mỗi năm

WITH table_month AS (
SELECT YEAR (transaction_time) [year] , MONTH (transaction_time) [month]
, COUNT ( transaction_id ) number_failed_trans
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id != '1'
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id != '1'
) unioned_table
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
-- ORDER BY [year], [month]
)
, table_rank AS (
SELECT *
, RANK () OVER ( PARTITION BY [year] ORDER BY number_failed_trans DESC ) rank
FROM table_month
)
SELECT *
FROM table_rank
WHERE rank < 4

-- 2.3 Tính khoảng cách giữa các lần thanh toán của KH telecom

WITH table_preivous_time AS (
SELECT fact19.customer_id, transaction_id, fact19.transaction_time
, LAG ( transaction_time , 1 ) OVER ( PARTITION BY customer_id ORDER BY
transaction_time ASC ) AS previous_time
FROM fact_transaction_2019 AS fact19
LEFT JOIN dim_scenario AS scen
ON scen.scenario_id = fact19.scenario_id
WHERE status_id = 1 AND category = 'Telco'
-- ORDER BY customer_id, transaction_time
)
, table_gap_days AS (
SELECT *
, DATEDIFF (day, previous_time, transaction_time ) AS gap_days
FROM table_preivous_time
)
SELECT customer_id
, AVG (gap_days) AS avg_days
FROM table_gap_days
GROUP BY customer_id
WITH table_month AS (
SELECT YEAR (transaction_time) [year], MONTH (transaction_time) [month]
, COUNT (DISTINCT customer_id) AS number_customers
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id = 1
) unioned_table
JOIN dim_scenario scena
ON unioned_table.scenario_id = scena.scenario_id
WHERE category = 'Telco'
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
)
, table_last_year AS (
SELECT *
, LAG (number_customers , 12 ) OVER ( ORDER BY [year] , [month] ) number_cus_last_year
FROM table_month
)
SELECT *
, FORMAT ( CAST ( ( number_customers - number_cus_last_year ) AS FLOAT ) /
number_cus_last_year , 'p') "%_growth"
FROM table_last_year
WHERE [year] = 2020
--Part2
--1.1

select year(transaction_time) as year


, month(transaction_time) as month
, concat(year(transaction_time), format(month(transaction_time), 'd2')) as
time_calendar
, FORMAT ( transaction_time , 'yyyyMM') time_calendar
, count(transaction_id) as number_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time), FORMAT ( transaction_time ,
'yyyyMM')
order by [year], [month]

--1.2.A
select year(transaction_time) as year
, month(transaction_time) as month
, sub_category
, count(transaction_id) as number_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time),sub_category
order by year(transaction_time), month(transaction_time),sub_category asc

--1.2.B

-- cách : Phương pháp PIVOT

select year,month
,[electricity] as electricity_trans
,[internet] as internet_trans
,[water] as water_trans
from (select year(transaction_time) as year
, month(transaction_time) as month
, sub_category
, count(transaction_id) as number_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time),sub_category
) as source_table
pivot(
sum(number_trans) for sub_category in
([electricity],[internet],[water])) as pivot_table
order by year,month
--- cách2 : GROUP BY với CASE WHEN

select year(transaction_time) as [year]


, month(transaction_time) as [month]
, COUNT ( CASE WHEN sub_category = 'internet' THEN transaction_id END ) AS
internet_trans
, COUNT ( CASE WHEN sub_category = 'electricity' THEN transaction_id END ) AS
elec_trans
, COUNT ( CASE WHEN sub_category = 'water' THEN transaction_id END ) AS water_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time)
order by [year], [month]

--- cách 3 : WINDOW FUNCTIONs với CASE WHEN


with table_joined AS (
select year(transaction_time) as [year]
, month(transaction_time) as [month]
, sub_category
, transaction_id
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
)
SELECT DISTINCT [year], [month]
, COUNT ( CASE WHEN sub_category = 'internet' THEN transaction_id END ) OVER (
PARTITION BY [year], [month] ) AS inter_trans
, COUNT ( CASE WHEN sub_category = 'water' THEN transaction_id END ) OVER ( PARTITION
BY [year], [month] ) AS water_trans
, COUNT ( CASE WHEN sub_category = 'electricity' THEN transaction_id END ) OVER (
PARTITION BY [year], [month] ) AS elec_trans
FROM table_joined
ORDER BY [year], [month]
--- 1.3 Tính theo % của từng sub_category

select year(transaction_time) as [year]


, month(transaction_time) as [month]
, COUNT ( CASE WHEN sub_category = 'internet' THEN transaction_id END ) AS
internet_trans
, COUNT ( CASE WHEN sub_category = 'electricity' THEN transaction_id END ) AS
elec_trans
, COUNT ( CASE WHEN sub_category = 'water' THEN transaction_id END ) AS water_trans
, COUNT ( transaction_id ) AS total_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
and sub_category IN ( 'internet', 'electricity', 'water')
group by year(transaction_time), month(transaction_time)
order by [year], [month]

-- 2.1 Tính số lượng giao dịch của hóa đơn tiền điện theo từng tuần trong vòng 2 năm

WITH tb_union AS (SELECT transaction_id, transaction_time, scenario_id


FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT transaction_id, transaction_time, scenario_id
FROM fact_transaction_2020
WHERE status_id = 1)
,
tb_electricity AS (SELECT transaction_id, transaction_time, tb_union.scenario_id ,
sub_category
FROM tb_union
LEFT JOIN dim_scenario AS sce
ON sce.scenario_id = tb_union.scenario_id
WHERE transaction_type = 'payment' and category ='Billing' AND sub_Category
='Electricity')
SELECT YEAR(transaction_time) AS year
, DATEPART (WEEK, transaction_time) AS WEEK
, COUNT(CASE WHEN sub_Category ='Electricity'THEN transaction_id END) as
electricity_trans
FROM tb_electricity
GROUP BY YEAR(transaction_time), DATEPART (WEEK, transaction_time)
ORDER BY YEAR(transaction_time), DATEPART (WEEK, transaction_time)

-- 2.2 Tính số lượng giao dịch trung bình của hóa đơn tiền điện của 3 tuần gần nhất

WITH tb_union AS (SELECT transaction_id, transaction_time, scenario_id


FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT transaction_id, transaction_time, scenario_id
FROM fact_transaction_2020
WHERE status_id = 1)
,
tb_electricity AS (SELECT transaction_id, transaction_time, tb_union.scenario_id ,
sub_category
FROM tb_union
LEFT JOIN dim_scenario AS sce
ON sce.scenario_id = tb_union.scenario_id
WHERE transaction_type = 'payment' and category ='Billing' AND sub_Category
='Electricity')
,
tb_week AS ( SELECT YEAR(transaction_time) AS year
, DATEPART (WEEK, transaction_time) AS WEEK
, COUNT(CASE WHEN sub_Category ='Electricity'THEN transaction_id END) as
electricity_trans
FROM tb_electricity
GROUP BY YEAR(transaction_time), DATEPART (WEEK, transaction_time)
-- ORDER BY YEAR(transaction_time), DATEPART (WEEK, transaction_time)
)
SELECT *
, AVG (electricity_trans) OVER ( ORDER BY [year], [week] ROWS BETWEEN 2 PRECEDING AND
CURRENT ROW ) AS avg_last_3_weeks
FROM tb_week

---> ROWS: số dòng


---> BETWEEN ... AND ...
--> N PRECEDING: N dòng phía trước
--> CURRENT ROW: dòng hiện tại

--> N FOLLOWING: N phía sau

https://learn.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-
server-ver16

You might also like