You are on page 1of 20

PROIECT ANALIZA DATELOR

OSICIANU CLAUDIUADRIAN
GRUPA 1060

a<-read.csv("date.csv", header=TRUE)
a
fix(a)
attach(a)
View(a)

#DESCRIEREA DATELOR
dim(a)

names(a)

a[1:5,]

summary(a)

table(a$id)

pie(table(a$floors))

pie(table(a$bedrooms))

#CALCUL VARIANTA
var(a$price)

var(a$bedrooms)

var(a$bathrooms)

var(a$sqft_living)

var(a$sqft_lot)

var(a$condition)

var(a$yr_built)

var(a$zipcode)

#CALCUL COVARIANTA
cov(a$id,a$price)

cov(a$id,a$bedrooms)

cov(a$id,a$bathrooms)

cov(a$id,a$floors)

#CALCUL CORELATIE
cor(a$id,a$price)

cor(a$id,a$bedrooms)

var(a$floors)

var(a$grade)

var(a$yr_renovated)

cor(a$id,a$bathrooms)

cor(a$id,a$floors)

#HISTOGRAMA
hist(a$price)

hist(a$bedrooms)

hist(a$bathrooms)

hist(a$sqft_living)

hist(a$sqft_lot)

hist(a$floors)

hist(a$condition)

hist(a$grade)

hist(a$yr_built)

hist(a$yr_renovated)

hist(a$zipcode)

#DENSITATEA DE PROBABILITATE
plot(density(a$price))

plot(density(a$bedrooms))

plot(density(a$bathrooms))

plot(density(a$sqft_living))

plot(density(a$sqft_lot))

plot(density(a$floors))

plot(density(a$condition))

plot(density(a$grade))

plot(density(a$yr_built))

plot(density(a$yr_renovated))

plot(density(a$zipcode))

#SCATTER PLOT
plot(a$id,a$price)

plot(a$id,a$bedrooms)

plot(a$id,a$bathrooms)

plot(a$id,a$floors)

#BOXPLOT
boxplot(a$price)

boxplot(a$bathrooms)

boxplot(a$floors)

boxplot(a$bedrooms)

boxplot(a$sqft_living)

boxplot(a$condition)

boxplot(a$grade)

boxplot(a$yr_built)

boxplot(a$yr_renovated)

boxplot(a$zipcode)

#SKEWNESS SI KURTOSIS
library(moments)
skewness(a$price)

skewness(a$bedrooms)

skewness(a$bathrooms)

skewness(a$sqft_living)

skewness(a$sqft_lot)

skewness(a$floors)

skewness(a$condition)

skewness(a$grade)

skewness(a$yr_built)
skewness(a$yr_renovated)
skewness(a$zipcode)

kurtosis(a$price)
kurtosis(a$bedrooms)
kurtosis(a$bathrooms)
kurtosis(a$sqft_living)
kurtosis(a$sqft_lot)
kurtosis(a$floors)
kurtosis(a$condition)
kurtosis(a$grade)

kurtosis(a$yr_built)
kurtosis(a$yr_renovated)
kurtosis(a$zipcode)

#jarque-bera
library(tseries)
jarque.bera.test(a$price)

jarque.bera.test(a$floors)

#curbele andrews
library(andrews)
andrews(a,clr=5,ymax=3)

View(a)
Y <- a[,2:12]
View(Y)
# CENTRAREA OBSERVATIILOR
centrare <- function(x) {(x - mean(x))}
Y.centrare <- apply(Y,2,centrare)
pairs(Y.centrare)

# STANDARDIZAREA OBSERVATIILOR
standardizare <- function(x) {(x - mean(x))/sd(x)} # centrarea observatiilor
Y.standardizare <- apply(Y,2,standardizare)
pairs(Y.standardizare)

# MATRICEA DE COVARIANTA
Y.cov <- cov(Y.standardizare)
View(Y.cov)

# MATRICEA DE CORELATIE
Y.cor <- cor(Y.standardizare)
View(Y.cor)

# IDENTIFICAREA VALORILOR & VECTORILOR PROPRII


Y.eigen <- eigen(Y.cov) # identificarea valorilor & vectorilor proprii folosind matricea
de covarianta
Y.eigen

lambda1 <- round((Y.eigen$values[1]/sum(Y.eigen$values)),2) # ponderea primei


valori proprii
lambda2 <- round((Y.eigen$values[2]/sum(Y.eigen$values)),2)
lambda3 <- round((Y.eigen$values[3]/sum(Y.eigen$values)),2)
lambda4 <- round((Y.eigen$values[4]/sum(Y.eigen$values)),2)
lambda5 <- round((Y.eigen$values[5]/sum(Y.eigen$values)),2)
lambda6 <- round((Y.eigen$values[6]/sum(Y.eigen$values)),2)
lambda7 <- round((Y.eigen$values[7]/sum(Y.eigen$values)),2)
lambda8 <- round((Y.eigen$values[8]/sum(Y.eigen$values)),2)
lambda9 <- round((Y.eigen$values[9]/sum(Y.eigen$values)),2)
lambda10 <- round((Y.eigen$values[10]/sum(Y.eigen$values)),2)
lambda.cov <- c(lambda1, lambda2, lambda3, lambda4, lambda5, lambda6,
lambda7, lambda8, lambda9, lambda10)
lambda.cov
plot (lambda.cov, typ="l",col="blue", main="Valorile proprii - Screeplot")

# COMPONENTELE PRINCIPALE & MATRICEA SCORURILOR


Y.m <- as.matrix(Y.standardizare)
U <- as.matrix(Y.eigen$vectors)
CP <- Y.m %*% U
View(CP)

pairs(CP)

cor(CP)

# COMPONENTELE PRINCIPALE & MATRICEA SCORURILOR-varianta 2


pca <- princomp(Y.standardizare, scores=TRUE, cor = FALSE)
pca

summary(pca)

scoruri <- pca$scores


scoruri

cor(scoruri)

pairs(scoruri)

plot (pca, typ="l",col="red", main="Valorile proprii - matrice covarianta")

# matricea factor
omega <- cor(Y.standardizare,scoruri)
omega

# analiza factoriala
factanal(pca, 3, covmat = Y.cov, n.obs = 26,
subset, na.action, start = NULL,
scores = c("none", "regression", "Bartlett"),
rotation = "none", control = "rotate")

# rotatia
varimax(omega, normalize = TRUE, eps = 1e-5)

promax(omega, m = 3)

#Creare dateframe cu variabile numerice


Y <- a[,2:12]
rownames(Y,a$Country)
View(Y)

#standardizarea observatiilor invederea aplicarii analizei cluster


standardize <- function(x) {(x - mean(x))/sd(x)}
Y_std <-apply(Y,2,standardize)
#calcularea distantelor dintre obiecte
distance <- dist(as.matrix(Y_std))
dist_mat <- as.matrix(distance)
dist_mat
write.csv(dist_mat, file="distante.csv")

You might also like