You are on page 1of 2

#options(scipen = 20)

#install.packages("RMySQL")
library(RMySQL)
library(pastecs)

summary_num_col <- data.frame()


summary_char_col <- data.frame()
#for (i in 1:length(tables)) {
char_columns <- vector()
for(j in 1:ncol(table)){
if(class(table[,j]) == 'factor') {
char_columns[length(char_columns)+1] <- j
}
}
print(char_columns)
table_num <- data.frame(table[,-char_columns])
colnames(table_num) <- colnames(table)[-char_columns]
if((ncol(table) - length(char_columns)) !=0){
table_summary_num <- t(stat.desc(table_num))
table_summary_num <-as.data.frame(table_summary_num)
table_num=na.exclude(table_num)
table_quantiles_num <- data.frame(t(as.data.frame(lapply(table_num, quanti
le, probs = c(0.25, 0.5,0.75,1), names = FALSE))))
names(table_quantiles_num)=c("Qo,25","Q0.5","Q0.75","Q1")
summary_num_full <- cbind(table_summary_num,table_quantiles_num)
summary_num <- summary_num_full[,-c(7,10,11,14)]
rm(table_summary_num,table_quantiles_num, table_num)
}
### Text Variable Summary Statistics ###
char_col_names = names(table)[char_columns]
summary_char = data.frame()
for (z in 1 : length(char_col_names)) {
x= table[,char_columns[z]]
e=data.frame(x)
Attribute=char_col_names[z]
Class = class(x)
Count = length(x)
Missing = sum(is.na(x))
j=0
Blanks=0
for(p in 1:nrow(table)){
if(e[p,]==""){
j=j+1
Blanks=j
}
}
Prop_Missing = sum(is.na(x))/length(x)
UniqueValue = length(unique(x))
cmat = cbind(Attribute,Class,Count,Missing,Prop_Missing,Blanks,UniqueValue

)
summary_char = rbind(summary_char,cmat)
}
#rm(x,Missing,cmat,Count,Class,Attribute)
##}
write.csv(summary_num,"summary_num_col.csv")
write.csv(summary_char,"summary_char_col.csv")
#####################END

You might also like