You are on page 1of 5

Machine Learning pipeline

#standardsql
CREATE OR REPLACE MODEL `flight.arrdelay` model name
OPTIONS
(model_type='linear_reg', input_label_cols=['ARRIVAL_DELAY']) AS dependent variable

SELECT
ARRIVAL_DELAY , Independent variable
AIRLINE,

ORIGIN_AIRPORT ,
DESTINATION_AIRPORT ,
DEPARTURE_DELAY ,
TAXI_OUT ,
DISTANCE
FROM
`big-query-291822.flight.flights`
WHERE
ARRIVAL_DELAY IS NOT NULL
Machine Learning pipeline
• Use your Linear Regression Mod #standardsql
el to predict: SELECT * FROM ML.PREDICT(MODEL `flight.arrdelay`,
(
(1) all variables of the model, limit 10 SELECT
ARRIVAL_DELAY ,
(2) flights delayed when the origin is AIRLINE ,
ORD ORIGIN_AIRPORT ,
>> ‘ORD’ AS ORIGIN_AIRPORT DESTINATION_AIRPORT ,
DEPARTURE_DELAY ,
(3) flights delayed from Dallas Fortwo TAXI_OUT ,
rth to Los Angeles DISTANCE
FROM
>> ‘DFW’ AS ORIGIN_AIRPORT `flight.flights`
WHERE
‘LAX’ AS DESTINATION_AIRPORT ARRIVAL_DELAY IS NOT NULL
LIMIT 10))
Create a logistic regression model to predict if the flight is on time or
is delayed based on carrier, origin, dest, dep_delay, taxi_out, distan
ce

建模
#standardsql
CREATE OR REPLACE MODEL `flight.ontime`
OPTIONS
(model_type='logistic_reg', input_label_cols=['ON_TIME']) AS
SELECT
IF(ARRIVAL_DELAY<= 15, 1 , 0) AS ON_TIME,
AIRLINE,
ORIGIN_AIRPORT ,
DESTINATION_AIRPORT ,
DEPARTURE_DELAY ,
TAXI_OUT ,
DISTANCE
FROM
`big-query-291822.flight.flights`
WHERE
ARRIVAL_DELAY IS NOT NULL
预测
• Use your Linear Regression SELECT * FROM ML.PREDICT(MODEL
Model to predict: `flight.ontime`,
(1) all variables of the model, limit 10 (
SELECT
(2) flights delayed when the origin is ARRIVAL_DELAY ,
ORD AIRLINE ,
>> ‘ORD’ AS ORIGIN_AIRPORT ORIGIN_AIRPORT ,
(3) flights delayed from Dallas DESTINATION_AIRPORT ,
Fortworth to Los Angeles DEPARTURE_DELAY ,
>> ‘DFW’ AS ORIGIN_AIRPORT TAXI_OUT ,
DISTANCE
‘LAX’ AS DESTINATION_AIRPORT FROM
`flight.flights`
WHERE
ARRIVAL_DELAY IS NOT NULL
LIMIT 10))
SELECT SELECT
ORIGIN_AIRPORT, COUNT(DEPARTURE_DELAY)AS totaldelay
COUNT(*) AS totaldelay FROM `big-query-291822.flight.flights`
FROM `big-query-291822.flight.flights` WHERE ORIGIN_AIRPORT = "ORD" AND MONTH = 7
WHERE ORIGIN_AIRPORT = "ORD" AND MONTH = 7 AND
DEPARTURE_DELAY > 0
GROUP BY ORIGIN_AIRPORT

SELECT
ORIGIN_AIRPORT,
AVG(DEPARTURE_DELAY) AS avg_dep_delay,
AVG(ARRIVAL_DELAY) AS avg_arr_delay
FROM `big-query-291822.flight.flights`
GROUP BY
ORIGIN_AIRPORT
ORDER BY
avg_arr_delay

You might also like