You are on page 1of 7

Data Frames

➢ Data Frames are generic data objects of R which are used to store the tabular data.

➢ A data frame is like a matrix, with a two-dimensional rows-and columns structure.

➢ However, it differs from a matrix in that each column of a matrix can be of the different data
types.

➢ Or data frames are the heterogeneous analogs of matrices for two-dimensional data.

# Create a data frame : Eg1

Data_Frame <- data.frame (

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)

# Print the data frame

print(Data_Frame)

# Create the data frame : Eg2

emp.data <- data.frame(

emp_id = c (1:5),

emp_name = c("Rick", "Dan", "Michelle", "Ryan", "Gary"),

salary = c(623.3, 515.2, 611.0,729.0, 843.25),

start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",

"2015-03-27")),

stringsAsFactors = FALSE

# Print the data frame.

print(emp.data)
Summary of data in the data frame

In R data frame, the statistical summary and nature of the data can be obtained by
applying summary() function.

Extract Data from Data Frame

Extract specific column from a data frame using column name.

Data_Frame <- data.frame (

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)

print(summary(Data_Frame))

emp.data <- data.frame(

emp_id = c (1:5),

emp_name = c("Rick", "Dan", "Michelle", "Ryan", "Gary"),

salary = c(623.3, 515.2, 611.0,729.0, 843.25),

start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",

"2015-03-27")),

stringsAsFactors = FALSE

print(summary(emp.data) )
# Extract Specific columns.

result <- data.frame(emp.data$emp_name,emp.data$salary)

print(result)

# Extract first two rows and all columns.

result <- emp.data[1:2,]

print(result)

# Extract 3rd and 4th row with 2nd and 3rd column.

result <- emp.data[3:4,2:3]

print(result)

# Extract 3rd and 5th row with 2nd and 4th column.

result <- emp.data[c(3,5),c(2,4)]

print(result)

Add Rows

Use the rbind() function to add new rows in a Data Frame

Add Columns

Use the cbind() function to add new columns in a Data Frame

Remove Rows and Columns

Use the c() function to remove rows and columns in a Data Frame

Data_Frame <- data.frame (

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)


)

print(Data_Frame)

# Add a new row

New_row_DF <- rbind(Data_Frame, c("Steve", 90, 25))

# Print the new row

New_row_DF

# Add a new column

New_col_DF <- cbind(Data_Frame, salary = c(1000, 6000, 2000))

# Print the new column

New_col_DF

# Remove the first row and column

Data_Frame_New <- Data_Frame[-c(1), -c(1)]

# Print the new data frame

Data_Frame_New

#dim() function to find the number of rows and columns in a Data Frame

dim(Data_Frame)
#ncol() function to find the number of columns and nrow() to find the number of rows

ncol(Data_Frame)

nrow(Data_Frame)

Combining Data Frames

Use the rbind() function to combine two or more data frames in R vertically

And use the cbind() function to combine two or more data frames in R horizontally

Data_Frame1 <- data.frame (

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)

Data_Frame2 <- data.frame (

Name = c("Steve", "Alice", "Bob"),

Score = c(95, 150, 120),

Age = c(40, 35, 55)

New_Data_Frame <- rbind(Data_Frame1, Data_Frame2)

New_Data_Frame

#Combine using cbind()


Data_Frame3<- data.frame(

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)

Data_Frame4<- data.frame(

Salary = c(3000, 6000, 2000),

BloodGroup = c("B+", "A+", "O+")

New_Data_Frame1<-cbind(Data_Frame3, Data_Frame4)

print(New_Data_Frame1)

# merging column without cbind()

Data_Frame3<- data.frame(

Name = c("Jack", "Jill", "Robert"),

Score = c(100, 150, 120),

Age = c(60, 30, 45)

Data_Frame3$Salary = c(4000,5000,6000)

New_Data_Frame<-Data_Frame3

print(New_Data_Frame)

You might also like