Professional Documents
Culture Documents
Query Notes Bu I 7
Query Notes Bu I 7
--- 1. Tính tổng: SUM, MIN, MAX, AVG, COUNT --> GROUP BY
-- PART 1: Ôn tập
Task 2.1: Calculate the number of successful transactions of each month in 2019 and
2020 (number_success_trans) (using Group By). Then create a column of the total
number of successful transactions of each year (total_trans_year) (using Window
Function). Finally calculate the successful transaction rate (success_rate) of each
month (pct).
-- b1: Gộp data 2 năm lại
-- b2: GROUP BY theo tháng
-- b3: Tìm total cả năm
-- b4: Tính lệ
WITH table_month AS (
SELECT YEAR (transaction_time) [year] , MONTH (transaction_time) [month]
, COUNT ( transaction_id ) number_trans
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id = 1
) unioned_table
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
-- ORDER BY [year], [month]
)
SELECT *
, SUM ( number_trans ) OVER ( PARTITION BY [year] ) total_trans_year
, CAST ( CAST ( number_trans AS FLOAT ) / SUM ( number_trans ) OVER ( PARTITION BY
[year] ) AS DECIMAL (10,2) ) pct
FROM table_month
WITH table_month AS (
SELECT YEAR (transaction_time) [year] , MONTH (transaction_time) [month]
, COUNT ( transaction_id ) number_failed_trans
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id != '1'
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id != '1'
) unioned_table
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
-- ORDER BY [year], [month]
)
, table_rank AS (
SELECT *
, RANK () OVER ( PARTITION BY [year] ORDER BY number_failed_trans DESC ) rank
FROM table_month
)
SELECT *
FROM table_rank
WHERE rank < 4
-- 2.3 Tính khoảng cách giữa các lần thanh toán của KH telecom
WITH table_preivous_time AS (
SELECT fact19.customer_id, transaction_id, fact19.transaction_time
, LAG ( transaction_time , 1 ) OVER ( PARTITION BY customer_id ORDER BY
transaction_time ASC ) AS previous_time
FROM fact_transaction_2019 AS fact19
LEFT JOIN dim_scenario AS scen
ON scen.scenario_id = fact19.scenario_id
WHERE status_id = 1 AND category = 'Telco'
-- ORDER BY customer_id, transaction_time
)
, table_gap_days AS (
SELECT *
, DATEDIFF (day, previous_time, transaction_time ) AS gap_days
FROM table_preivous_time
)
SELECT customer_id
, AVG (gap_days) AS avg_days
FROM table_gap_days
GROUP BY customer_id
WITH table_month AS (
SELECT YEAR (transaction_time) [year], MONTH (transaction_time) [month]
, COUNT (DISTINCT customer_id) AS number_customers
FROM (
SELECT * FROM fact_transaction_2019
WHERE status_id = 1
UNION
SELECT * FROM fact_transaction_2020
WHERE status_id = 1
) unioned_table
JOIN dim_scenario scena
ON unioned_table.scenario_id = scena.scenario_id
WHERE category = 'Telco'
GROUP BY YEAR (transaction_time) , MONTH (transaction_time)
)
, table_last_year AS (
SELECT *
, LAG (number_customers , 12 ) OVER ( ORDER BY [year] , [month] ) number_cus_last_year
FROM table_month
)
SELECT *
, FORMAT ( CAST ( ( number_customers - number_cus_last_year ) AS FLOAT ) /
number_cus_last_year , 'p') "%_growth"
FROM table_last_year
WHERE [year] = 2020
--Part2
--1.1
--1.2.A
select year(transaction_time) as year
, month(transaction_time) as month
, sub_category
, count(transaction_id) as number_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time),sub_category
order by year(transaction_time), month(transaction_time),sub_category asc
--1.2.B
select year,month
,[electricity] as electricity_trans
,[internet] as internet_trans
,[water] as water_trans
from (select year(transaction_time) as year
, month(transaction_time) as month
, sub_category
, count(transaction_id) as number_trans
from (select * from fact_transaction_2019
union select * from fact_transaction_2020) as trans_19_20
left join dim_scenario as scen
on trans_19_20.scenario_id=scen.scenario_id
where category='billing'
and status_id=1
group by year(transaction_time), month(transaction_time),sub_category
) as source_table
pivot(
sum(number_trans) for sub_category in
([electricity],[internet],[water])) as pivot_table
order by year,month
--- cách2 : GROUP BY với CASE WHEN
-- 2.1 Tính số lượng giao dịch của hóa đơn tiền điện theo từng tuần trong vòng 2 năm
-- 2.2 Tính số lượng giao dịch trung bình của hóa đơn tiền điện của 3 tuần gần nhất
https://learn.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql?view=sql-
server-ver16