You are on page 1of 7

ASSIGNMENT NO.

6
>airquality
>airquality->air
>dim(air)
153 6

>head(air)
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6

>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000 Min. : 1.0
1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
Median : 31.50 Median :205.0 Median : 9.700 Median :79.00 Median :7.000 Median :16.0
Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88 Mean :6.993 Mean :15.8
3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000 Max. :31.0
NA's :37 NA's :7

>mean(air$Ozone)
NA

>mean(air$Ozone,na.rm=TRUE)
42.12931

>mean(air$Solar.R)
NA

>mean(air$Solar.R,na.rm=TRUE)
185.9315

>max(air$Solar.R)
NA

>max(air$Solar.R,na.rm=TRUE)
334
>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000 Min. : 1.0
1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
Median : 31.50 Median :205.0 Median : 9.700 Median :79.00 Median :7.000 Median :16.0
Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88 Mean :6.993 Mean :15.8
3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000 Max. :31.0
NA's :37 NA's :7
********************************************************************************

#data cleaning
>air$Ozone=ifelse(is.na(air$Ozone),median(air$Ozone,na.rm=TRUE),air$Ozone)
>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000 Min. : 1.0
1st Qu.: 21.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
Median : 31.50 Median :205.0 Median : 9.700 Median :79.00 Median :7.000 Median :16.0
Mean : 39.56 Mean :185.9 Mean : 9.958 Mean :77.88 Mean :6.993 Mean :15.8
3rd Qu.: 46.00 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000 Max. :31.0
NA's :7

>air$Solar.R=ifelse(is.na(air$Solar.R),median(air$Solar.R,na.rm = TRUE),air$Solar.R)
>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000 Min. : 1.0
1st Qu.: 21.00 1st Qu.:120.0 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
Median : 31.50 Median :205.0 Median : 9.700 Median :79.00 Median :7.000 Median :16.0
Mean : 39.56 Mean :186.8 Mean : 9.958 Mean :77.88 Mean :6.993 Mean :15.8
3rd Qu.: 46.00 3rd Qu.:256.0 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000 Max. :31.0

************************************************************************************************
# data Transformation
>head(air)
Ozone Solar.R Wind Temp Month Day
1 41.0 190 7.4 67 5 1
2 36.0 118 8.0 72 5 2
3 12.0 149 12.6 74 5 3
4 18.0 313 11.5 62 5 4
5 31.5 205 14.3 56 5 5
6 28.0 205 14.9 66 5 6

>air$Solar.Danger=air$Solar.R>100
>head(air)
Ozone Solar.R Wind Temp Month Day Solar.Danger
1 41.0 190 7.4 67 5 1 TRUE
2 36.0 118 8.0 72 5 2 TRUE
3 12.0 149 12.6 74 5 3 TRUE
4 18.0 313 11.5 62 5 4 TRUE
5 31.5 205 14.3 56 5 5 TRUE
6 28.0 205 14.9 66 5 6 TRUE

>brks=c(0,50,100,150,200,250,300,350)
>air$Solar.R=cut(air$Solar.R,breaks = brks,include.lowest = TRUE)
>head(air)
Ozone Solar.R Wind Temp Month Day Solar.Danger
1 41.0 (150,200] 7.4 67 5 1 TRUE
2 36.0 (100,150] 8.0 72 5 2 TRUE
3 12.0 (100,150] 12.6 74 5 3 TRUE
4 18.0 (300,350] 11.5 62 5 4 TRUE
5 31.5 (200,250] 14.3 56 5 5 TRUE
6 28.0 (200,250] 14.9 66 5 6 TRUE

>air1=air
>air1$Month=gsub(5,"May",air1$Month)
>air1$Month=gsub(6,"June",air1$Month)
>air1$Month=gsub(7,"July",air1$Month)
>air1$Month=gsub(8,"August",air1$Month)
>air1$Month=gsub(9,"Sept",air1$Month)
>head(air1)
Ozone Solar.R Wind Temp Month Day Solar.Danger
1 41.0 (150,200] 7.4 67 May 1 TRUE
2 36.0 (100,150] 8.0 72 May 2 TRUE
3 12.0 (100,150] 12.6 74 May 3 TRUE
4 18.0 (300,350] 11.5 62 May 4 TRUE
5 31.5 (200,250] 14.3 56 May 5 TRUE
6 28.0 (200,250] 14.9 66 May 6 TRUE
********************************************************************************

#Data Integretion
>airquality->air
>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 Min. :5.000 Min. : 1.0
1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00 1st Qu.:6.000 1st Qu.: 8.0
Median : 31.50 Median :205.0 Median : 9.700 Median :79.00 Median :7.000 Median :16.0
Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88 Mean :6.993 Mean :15.8
3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00 3rd Qu.:8.000 3rd Qu.:23.0
Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00 Max. :9.000 Max. :31.0
NA's :37 NA's :7
>min=1
>max=334
>head(air)
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6

>dim(air)
153 6

>for(i in 1:nrow(air))
+{
+ if(is.na(air[i,"Ozone"])){
+
air[i,"Ozone"]=mean(air[which(air[,"Month"]==air[i,"Month"]),"Ozone"],na.rm=TRUE)
+
+ }
+ if(is.na(air[i,"Solar.R"])){
+
air[i,"Solar.R"]=mean(air[which(air[,"Month"]==air[i,"Month"]),"Solar.R"],na.rm=TRUE)
+ }
+}

> normalize=function(x){
+ return((x-min(x))/(max(x)-min(x)))
+ }

>air=normalize(air)
>summary(air)
Ozone Solar.R Wind Temp Month Day
Min. :0.00000 Min. :0.01802 Min. :0.002102 Min. :0.1652 Min. :0.01201 Min. :0.00000
1st Qu.:0.06006 1st Qu.:0.35736 1st Qu.:0.019219 1st Qu.:0.2132 1st Qu.:0.01502 1st Qu.:0.02102
Median :0.08542 Median :0.57958 Median :0.026126 Median :0.2342 Median :0.01802 Median :0.04505
Mean :0.11967 Mean :0.55416 Mean :0.026899 Mean :0.2309 Mean :0.01800 Mean :0.04446
3rd Qu.:0.17452 3rd Qu.:0.76577 3rd Qu.:0.031532 3rd Qu.:0.2523 3rd Qu.:0.02102 3rd Qu.:0.06607
Max. :0.50150 Max. :1.00000 Max. :0.059159 Max. :0.2883 Max. :0.02402 Max. :0.09009

>str(air)
'data.frame': 153 obs. of 6 variables:
$ Ozone : num 0.1201 0.1051 0.033 0.0511 0.0679 ...
$ Solar.R: num 0.568 0.351 0.444 0.937 0.541 ...
$ Wind : num 0.0192 0.021 0.0348 0.0315 0.0399 ...
$ Temp : num 0.198 0.213 0.219 0.183 0.165 ...
$ Month : num 0.012 0.012 0.012 0.012 0.012 ...
$ Day : num 0 0.003 0.00601 0.00901 0.01201 ...

>head(air)
Ozone Solar.R Wind Temp Month Day
1 0.12012012 0.5675676 0.01921922 0.1981982 0.01201201 0.000000000
2 0.10510511 0.3513514 0.02102102 0.2132132 0.01201201 0.003003003
3 0.03303303 0.4444444 0.03483483 0.2192192 0.01201201 0.006006006
4 0.05105105 0.9369369 0.03153153 0.1831832 0.01201201 0.009009009
5 0.06791407 0.5414303 0.03993994 0.1651652 0.01201201 0.012012012
6 0.08108108 0.5414303 0.04174174 0.1951952 0.01201201 0.015015015

********************************************************************************
#3. Apply linear regression algorithm using Least Squares Method on
"Ozone" and "Solar.R"
>Y=air[,"Ozone"]
>X=air[,"Solar.R"]
>lMod=lm(Y~X)
>lMod

Call:
lm(formula = Y ~ X)

Coefficients:
(Intercept) X
0.06509 0.09849

>plot(Y~X)
>abline(lMod,col="black",lwd=3)
********************************************************************************

#4. Apply linear regression algorithm using Least Squares Method on


"Ozone" and "Wind"
>Y=air[,"Ozone"]
>X=air[,"Wind"]
>lMod2=lm(Y~X)
>lMod2
Call:
lm(formula = Y ~ X)

Coefficients:
(Intercept) X
0.2364 -4.3410

>plot(Y~X)
>abline(lMod2,col="blue",lwd=3)
********************************************************************************

#Perform Predition

#Predition of 'Ozone' when 'Solar.R'=10

>p1=predict(lMod2,data.frame("X"=10))
>p1
1
-43.17343

#Predition of 'Ozone' when 'Solar.R'=5

>p2=predict(lMod2,data.frame("X"=5))
>p2
1
-21.46849

You might also like