Professional Documents
Culture Documents
Kumpulan Query PA 1 Kelompok 5 Batch 13
Kumpulan Query PA 1 Kelompok 5 Batch 13
A. Data Cleansing:
-- 6 Timezone di USA
CREATE TABLE crash_clean as
WITH timezone_in_usa as (
SELECT x.*
FROM (
SELECT crash.*,
CASE
WHEN state_name IN ( 'Connecticut', 'Delaware', 'Georgia', 'Maine', 'Maryland',
'Massachusetts', 'Michigan', 'New Hampshire', 'New Jersey',
'New York', 'North Carolina', 'Ohio', 'Pennsylvania',
'Rhode Island', 'South Carolina', 'Vermont','Virginia',
'West Virginia','District of Columbia','Florida','Indiana','Tennessee')
-- 22
THEN timestamp_of_crash AT TIME ZONE 'EST'
WHEN state_name IN ( 'Alabama', 'Arkansas', 'Illinois', 'Iowa', 'Louisiana',
'Michigan', 'Minnesota', 'Mississippi','Missouri',
'Nebraska', 'Oklahoma', 'Wisconsin','Kentucky','South Dakota' )
THEN timestamp_of_crash AT TIME ZONE 'CST'
-- 13
WHEN state_name IN ( 'Arizona', 'Colorado', 'Montana', 'New Mexico',
'North Dakota','Texas', 'Utah', 'Wyoming','Kansas')
THEN timestamp_of_crash AT TIME ZONE 'MST'
-- 9
WHEN state_name IN ( 'California', 'Nevada', 'Washington','Idaho','Oregon' )
THEN timestamp_of_crash AT TIME ZONE 'PST'
-- 5
WHEN state_name IN ('Main Part of Alaska', 'Anchorage', 'Juneau', 'Nome' )
THEN timestamp_of_crash AT TIME ZONE 'AKST'
-- 0
WHEN state_name IN ('Alaska', 'Hawaii' )
THEN timestamp_of_crash AT TIME ZONE 'HST'
-- 2
END converted_timestamp
FROM crash) x
WHERE EXTRACT (YEAR FROM converted_timestamp)='2021')
SELECT *
FROM timezone_in_usa
where milepoint not in (99998, 99999)
and city_name != 'NOT APPLICABLE'
and city_name != 'Not Reported'
and city_name != 'Unknown'
and land_use_name != 'Not Reported'
and land_use_name != 'Unknown'
and functional_system_name != 'Unknown'
and functional_system_name != 'Not Reported'
and manner_of_collision_name != 'Not Reported'
and manner_of_collision_name != 'Reported as Unknown'
and type_of_intersection_name != 'Unknown'
and type_of_intersection_name != 'Not Reported'
and light_condition_name != 'Not Reported'
and light_condition_name != 'Reported as Unknown'
and atmospheric_conditions_1_name!='Not Reported'
and atmospheric_conditions_1_name!='Reported as Unknown'
***
1. Ada 4 Query
Query:
select distinct atmospheric_conditions_1_name, count(1)
from crash
where atmospheric_conditions_1_name!='Not Reported'
group by 1
order by 2 desc
limit 5
Query:
select distinct type_of_intersection_name, count(1)
from crash
where type_of_intersection_name!='Not Reported'
and type_of_intersection_name!='Reported as Unknown'
group by 1
order by 2 desc
limit 5
Query:
select distinct light_condition_name, count(1)
from crash
group by 1
order by 2 desc
limit 5
Tambahan di 1d.
select
count(number_of_drunk_drivers) as total_kecelakaan_per_drunk_drivers
from crash_clean
where number_of_drunk_drivers > 0
***
Query:
select state_name, count(consecutive_number)
from crash_clean
group by 1
order by 2 desc
limit 10
***
Query:
Query:
-- SOAL 4
-- PERSENTASE KECELAKAAN YANG DISEBABKAN OLEH PENGEMUDI MABUK --
select
count(*) as total_kecelakaan,
COUNT (CASE WHEN number_of_drunk_drivers > 0 THEN 1 END) as drunk_driver,
(COUNT(CASE WHEN number_of_drunk_drivers > 0 THEN 1 END) * 100.0 /
COUNT(number_of_drunk_drivers)) AS persentase_mabuk
from crash_clean
***
Query:
SELECT
land_use_name,
COUNT(*) AS jumlah_kecelakaan,
SUM(COUNT(*)) OVER (PARTITION BY NULL) AS kecelakan_keseluruhan,
(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY NULL)) AS persentase
FROM
crash_clean
WHERE
land_use_name!='Trafficway Not in State Inventory'
GROUP BY
land_use_name;
***