You are on page 1of 6

Q3.1: Take the last full month of NYISO data (e.g. Feb.

2019 if the assignment is due in


Mar. 2019) and plot the hourly (you need to average the load for each hour based on the
five minute periods) load for your assigned zone with Excel or dygraphs for the entire
month (24*days) - use appropriate axes’ labels
Lin Yuan
Selected zone: MILLWD

(nchar('Lin')+nchar('Yuan')) %% 11
[1] 7
load_url<-'http://mis.nyiso.com/public/csv/pal/'
run.monthly<-T
out_path<-'C:/Temp/NYISO'
months<-seq(as.Date("2021-06-01"), by = "month", length.out = 1)
months<-rev(months)
zipped_load_urls<-paste(load_url,gsub('-','',months),'pal_csv.zip',sep='') #urls to process
require(RPostgreSQL)
conn = dbConnect(drv=pg
,user="electricitymarketwriter"
,password="write123"
,host="localhost"
,port=5432
,dbname="electricitymarket"
)
for(zipped_url in zipped_load_urls){
if(!run.monthly) break()
#zipped_url<-zipped_load_urls[1]
temp_file<-paste(out_path,'/temp.zip',sep="")
download.file(zipped_url,temp_file)
unzip(zipfile = temp_file, exdir = out_path) #extract from archive
file.remove(temp_file)
csvs<-rev(list.files(out_path,full.names = T))
for(csv in csvs){
#csv<-csvs[1]
load_csv<-na.omit(read.csv(file=csv))
#load_csv$Time.Stamp<-as.character(as.POSIXct(load_csv$Time.Stamp,format="%m/%d/%Y
%H:%M:%S"))
cat("Processing",csv,"...")
t0<-Sys.time()
for(k in 1:nrow(load_csv)){
#k<-1
stm<-paste(
'INSERT INTO load VALUES ('
,"'",load_csv$Time.Stamp[k],"',"
,"'",load_csv$Time.Zone[k],"',"
,"'",load_csv$Name[k],"',"
,load_csv$PTID[k],","
,load_csv$Load[k],");"
+,load_csv$Load[k],") ON CONFLICT (time_stamp, time_zone, ptid) DO NOTHING;"
,sep=""
)
result<-dbSendQuery(conn,stm)
#dbGetQuery(conn,stm)
} # of for(k)
t1<-Sys.time()
file.remove(csv)
cat('done after',(t1-t0),'s.\n')

} # of for(csv)
} # of for(url)
from_dt<-'2021-05-01 00:00:00'
to_dt<-'2021-05-31 23:59:59'
qry<-paste(
"SELECT date_trunc('hour',time_stamp) as ymdh, AVG(total_load) as avg_load
FROM
(SELECT time_stamp, time_zone, SUM(load) as total_load
FROM load",
" WHERE time_stamp BETWEEN '",from_dt,"' AND '",to_dt,"' AND node_name =
'MILLWD'",
" GROUP BY time_stamp, time_zone) TL
GROUP BY date_trunc('hour',time_stamp)
ORDER BY ymdh;",sep=""
)
hrly_load<-dbGetQuery(conn,qry)
rownames(hrly_load)<-hrly_load$ymdh
hrly_load$ymdh<-NULL
require(xts)
hrly_load.xts<-as.xts(hrly_load)
require(dygraphs)
dygraph(hrly_load.xts)

Q3.2: Using the data from Q3.1, create a 24 hours-ahead load forecast and show the
forecast plot generated in R (with prediction intervals) – you may use the standard
“ancient” R plot for this one
require(forecast)
aa<-auto.arima(hrly_load.xts,stepwise = F)
summary(aa)
fcst<-forecast(aa,24)
plot(fcst)
require(dygraphs)
fcst.95pct<-as.data.frame(fcst)[,c('Lo 95','Point Forecast','Hi 95')]
last.date<-tail(rownames(hrly_load),1)
fcst.hours<-tail(seq(from=as.POSIXct(last.date, tz="America/New_York"),
to=as.POSIXct(last.date, tz="America/New_York")+3600*24, by="hour"),-1)
rownames(fcst.95pct)<-fcst.hours
dygraph(fcst.95pct) %>%
dySeries(c('Lo 95','Point Forecast','Hi 95'))
fake.fcst.95pct<-as.data.frame(hrly_load)
fake.fcst.95pct$`Lo 95`<-hrly_load$avg_load
fake.fcst.95pct$`Hi 95`<-hrly_load$avg_load
fake.fcst.95pct<-fake.fcst.95pct[,c('Lo 95','avg_load','Hi 95')]
colnames(fcst.95pct)<-c('Lo 95','avg_load','Hi 95')
dygraph(rbind(fake.fcst.95pct,fcst.95pct)) %>%
dySeries(c('Lo 95','avg_load','Hi 95'))
Q3.3: Calculate the MAPE of your forecast using historical data for the first day (24
hours) of the month following the month you used to fit the model
MAPE=0.1110762
new_from_dt<-as.character(as.POSIXct(to_dt)+1)
new_to_dt<-as.character(as.POSIXct(to_dt)+60*60*24)
new_qry<-paste(
"SELECT date_trunc('hour',time_stamp) as ymdh, AVG(total_load) as avg_load
FROM
(SELECT time_stamp, time_zone, SUM(load) as total_load
FROM load",
" WHERE time_stamp BETWEEN '",new_from_dt,"' AND '",new_to_dt,"' AND node_name =
'MILLWD'" ,
" GROUP BY time_stamp, time_zone) TL
GROUP BY date_trunc('hour',time_stamp)
ORDER BY ymdh;",sep=""
)
new_hrly_load<-dbGetQuery(conn,new_qry) #retrieve data
rownames(new_hrly_load)<-new_hrly_load$ymdh
new_hrly_load$ymdh<-NULL
accuracy<-new_hrly_load
accuracy$fcst<-fcst$mean[1:nrow(accuracy)] # mean is the actual forecast
head(accuracy)
accuracy$E<-accuracy$avg_load-accuracy$fcst
accuracy$AE<-abs(accuracy$E)
mae<-mean(accuracy$AE)
accuracy$PE<-accuracy$E/accuracy$avg_load
accuracy$APE<-abs(accuracy$PE)
mape<-mean(accuracy$APE)

Q3.4: Use Excel or dygraphs to show the historical and predicted load for the first day of
the month following the month you used to fit the model (just the 24 hours)
require(PerformanceAnalytics)
chart.TimeSeries(accuracy[,c('avg_load','fcst')],legend.loc='bottomright')

You might also like