You are on page 1of 5

Kumpulan Query PA 1 Batch 13 Kelompok 5

A. Data Cleansing:

-- 6 Timezone di USA
CREATE TABLE crash_clean as
WITH timezone_in_usa as (
SELECT x.*
FROM (
SELECT crash.*,
CASE
WHEN state_name IN ( 'Connecticut', 'Delaware', 'Georgia', 'Maine', 'Maryland',
'Massachusetts', 'Michigan', 'New Hampshire', 'New Jersey',
'New York', 'North Carolina', 'Ohio', 'Pennsylvania',
'Rhode Island', 'South Carolina', 'Vermont','Virginia',
'West Virginia','District of Columbia','Florida','Indiana','Tennessee')
-- 22
THEN timestamp_of_crash AT TIME ZONE 'EST'
WHEN state_name IN ( 'Alabama', 'Arkansas', 'Illinois', 'Iowa', 'Louisiana',
'Michigan', 'Minnesota', 'Mississippi','Missouri',
'Nebraska', 'Oklahoma', 'Wisconsin','Kentucky','South Dakota' )
THEN timestamp_of_crash AT TIME ZONE 'CST'
-- 13
WHEN state_name IN ( 'Arizona', 'Colorado', 'Montana', 'New Mexico',
'North Dakota','Texas', 'Utah', 'Wyoming','Kansas')
THEN timestamp_of_crash AT TIME ZONE 'MST'
-- 9
WHEN state_name IN ( 'California', 'Nevada', 'Washington','Idaho','Oregon' )
THEN timestamp_of_crash AT TIME ZONE 'PST'
-- 5
WHEN state_name IN ('Main Part of Alaska', 'Anchorage', 'Juneau', 'Nome' )
THEN timestamp_of_crash AT TIME ZONE 'AKST'
-- 0
WHEN state_name IN ('Alaska', 'Hawaii' )
THEN timestamp_of_crash AT TIME ZONE 'HST'
-- 2
END converted_timestamp
FROM crash) x
WHERE EXTRACT (YEAR FROM converted_timestamp)='2021')

SELECT *
FROM timezone_in_usa
where milepoint not in (99998, 99999)
and city_name != 'NOT APPLICABLE'
and city_name != 'Not Reported'
and city_name != 'Unknown'
and land_use_name != 'Not Reported'
and land_use_name != 'Unknown'
and functional_system_name != 'Unknown'
and functional_system_name != 'Not Reported'
and manner_of_collision_name != 'Not Reported'
and manner_of_collision_name != 'Reported as Unknown'
and type_of_intersection_name != 'Unknown'
and type_of_intersection_name != 'Not Reported'
and light_condition_name != 'Not Reported'
and light_condition_name != 'Reported as Unknown'
and atmospheric_conditions_1_name!='Not Reported'
and atmospheric_conditions_1_name!='Reported as Unknown'

***

B. Query Pembahasan Soal

1. Ada 4 Query

Nomor 1. Kondisi yang memperbesar resiko kecelakaan

1a. Faktor Cuaca (atmospheric conditions)

Query:
select distinct atmospheric_conditions_1_name, count(1)
from crash
where atmospheric_conditions_1_name!='Not Reported'
group by 1
order by 2 desc
limit 5

1b. Faktor persimpangan jalan (type of intersection)

Query:
select distinct type_of_intersection_name, count(1)
from crash
where type_of_intersection_name!='Not Reported'
and type_of_intersection_name!='Reported as Unknown'
group by 1
order by 2 desc
limit 5

1c. Faktor kondisi cahaya (light conditions)

Query:
select distinct light_condition_name, count(1)
from crash
group by 1
order by 2 desc
limit 5

1d. Faktor pengendara mabuk (number of drunk drivers)

Query Utama 1d:


select distinct number_of_drunk_drivers, count(1)
from crash
group by 1
order by 2 desc
limit 5

Tambahan di 1d.

Query kasus kecelakaan dengan pengendara mabuk

select
count(number_of_drunk_drivers) as total_kecelakaan_per_drunk_drivers
from crash_clean
where number_of_drunk_drivers > 0

Query kasus kecelakaan dengan pengendara tidak mabuk

select count(consecutive_number) as jumlah_kecelakaan_non_drunk_drivers


from crash_clean
where number_of_drunk_drivers = 0

***

Nomor 2. 10 negara bagian teratas dengan angka kecelakaan tertinggi

Query:
select state_name, count(consecutive_number)
from crash_clean
group by 1
order by 2 desc
limit 10

***

Nomor 3. Rata-rata kecelakaan per hari berdasarakan jam

Query:

--menghitung rata-rata kecelakaan per jam--


select
extract(HOUR from converted_timestamp) as jam_kecelakaan,
COUNT(consecutive_number)/365 as rerata_kecelakaan_per_jam
from crash_clean
group by extract(HOUR from converted_timestamp)
order by jam_kecelakaan ASC
***

4. Persentase kecelakaan akibat pengemudi mabuk

Query:
-- SOAL 4
-- PERSENTASE KECELAKAAN YANG DISEBABKAN OLEH PENGEMUDI MABUK --
select
count(*) as total_kecelakaan,
COUNT (CASE WHEN number_of_drunk_drivers > 0 THEN 1 END) as drunk_driver,
(COUNT(CASE WHEN number_of_drunk_drivers > 0 THEN 1 END) * 100.0 /
COUNT(number_of_drunk_drivers)) AS persentase_mabuk
from crash_clean

***

5. Persentase kecelakaan di area Pedesaan dan Perkotaan

Query:
SELECT
land_use_name,
COUNT(*) AS jumlah_kecelakaan,
SUM(COUNT(*)) OVER (PARTITION BY NULL) AS kecelakan_keseluruhan,
(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY NULL)) AS persentase
FROM
crash_clean
WHERE
land_use_name!='Trafficway Not in State Inventory'
GROUP BY
land_use_name;
***

6. Angka kecelakaan berdasarkan hari kecelakaan terjadi

Query (sudah sekalian pengurutan) :


--Angka kecelakaan terbanyak berdasarkan hari terjadinya kecelakaan--
WITH hari_kecelakaan AS
(select
converted_timestamp,
to_char (converted_timestamp, 'D-Day') as nama_hari
from crash_clean)
SELECT nama_hari, COUNT(*) as jumlah_kecelakaan_per_hari
FROM hari_kecelakaan
GROUP BY nama_hari
ORDER BY jumlah_kecelakaan_per_hari DESC

You might also like