EXERCISE 1:Basics of plotting graphs, different types of graphs in
analytics
The following is an introduction for producing simple graphs with the R Programming Language. Each example builds on the previous one. The
areas in bold indicate new text that was added to the previous example. The graph produced by each example is shown on the right.
Jump to a section:
1. Plots
2. Line Charts
3. Bar Charts
4. Histograms
5. Pie Charts
6. Dotcharts
7. Misc
Plots
Data visualization is a crucial aspect of data analysis, allowing us to gain insights and communicate findings
effectively. In R, the plot() function is a versatile tool for creating a wide range of plots, including scatter plots, line
plots, bar plots, histograms, and more.
Creating a scatter plot using plot() function:
#write syntax and explain parameters in it
# Create example data
x <- c(1, 2, 3, 4, 5)
y <- c(2, 3, 5, 4, 6)
# Create a scatter plot
plot(x, y, main = "Scatter Plot", xlab = "X-axis",
ylab = "Y-axis", col = "blue", pch = 19)
#Create a scatter plot in “mtcars” dataset
plot(mtcars$mpg, mtcars$wt, col='steelblue',
main='Scatterplot', xlab='mpg', ylab='wt', pch=19)
Creating a line using plot() function:
#write syntax and explain parameters in it
# Create example data
time <- 1:10
values <- c(2, 4, 5, 7, 6, 8, 9, 10, 12, 11)
# Create a line plot
plot(time, values, type = "l", main = "Line Plot",
xlab = "Time", ylab = "Values",col = "red")
# Create a scatter plot
plot(x = mtcars$hp, y = mtcars$mpg,
xlab = "Horsepower", ylab = "Miles per Gallon",
main = "Scatter Plot: Horsepower vs. MPG")
# Add a regression line
abline(lm(mpg ~ hp, data = mtcars), col = "red")
Creating a box plot using plot() function:
#write syntax and explain parameters in it
# Create example data
categories <- c("A", "B", "C", "D")
counts <- c(10, 15, 8, 12)
# Create a box plot
boxplot(counts, names.arg = categories,
main = "Box Plot", xlab = "Categories",
ylab = "Counts", col = "pink",
border = "black", horiz = TRUE)
# Create boxplot of values for hp
boxplot(mtcars$hp, main='Distribution of hp values',
ylab='hp', col='blue', border='yellow')
# Create boxplot of all values
boxplot(mtcars, main='Distribution of hp values',
xlab="cars", ylab='hp', col='blue', border='yellow')
Line Charts
#write syntax and explain parameters in it
First we'll produce a very simple graph using the values in the
car vector:
# Define the cars vector with 5 values
cars <- c(1, 3, 6, 4, 9)
# Graph the cars vector with all defaults
plot(cars)
Let's add a title, a line to connect the points, and some
color:
# Define the cars vector with 5 values
cars <- c(1, 3, 6, 4, 9)
# Graph cars using blue points overlayed by a line
plot(cars, type="o", col="blue")
# Create a title with a red, bold/italic font
title(main="Autos", col.main="red", font.main=4)
Now let's add a red line for trucks and specify the y-axis range directly so it will
be large enough to fit the truck data:
# Define 2 vectors
cars <- c(1, 3, 6, 4, 9)
trucks <- c(2, 5, 4, 5, 12)
# Graph cars using a y axis that ranges from 0 to 12
plot(cars, type="o", col="blue", ylim=c(0,12))
# Graph trucks with red dashed line and square points
lines(trucks, type="o", pch=22, lty=2, col="red")
# Create a title with a red, bold/italic font
title(main="Autos", col.main="red", font.main=4)
Next let's change the axes labels to match our data and add a legend. We'll also
compute the y-axis values using the max function so any changes to
our data will
be automatically reflected in our graph.
# Define 2 vectors
cars <- c(1, 3, 6, 4, 9)
trucks <- c(2, 5, 4, 5, 12)
# Calculate range from 0 to max value of cars and
trucks g_range <- range(0, cars, trucks)
# Graph autos using y axis that ranges from 0 to max #
value in cars or trucks vector. Turn off axes and #
annotations (axis labels) so we can specify them
ourself plot(cars, type="o", col="blue", ylim=g_range,
axes=FALSE, ann=FALSE)
# Make x axis using Mon-Fri labels
axis(1, at=1:5, lab=c("Mon","Tue","Wed","Thu","Fri"))
# Make y axis with horizontal labels that display ticks
at # every 4 marks. 4*0:g_range[2] is equivalent to
c(0,4,8,12). axis(2, las=1, at=4*0:g_range[2])
# Create box around plot
box()
# Graph trucks with red dashed line and square
points lines(trucks, type="o", pch=22, lty=2,
col="red")
# Create a title with a red, bold/italic font
title(main="Autos", col.main="red", font.main=4)
# Label the x and y axes with dark green text
title(xlab="Days", col.lab=rgb(0,0.5,0))
title(ylab="Total", col.lab=rgb(0,0.5,0))
# Create a legend at (1, g_range[2]) that is slightly
smaller # (cex) and uses the same line colors and
points used by # the actual plots
legend(1, g_range[2], c("cars","trucks"),
cex=0.8, col=c("blue","red"), pch=21:22,
lty=1:2);
# Load the mtcars dataset (if not already
loaded)
data(mtcars)
# Create a scatterplot with a line of best fit
plot(x = mtcars$hp, y = mtcars$mpg, xlab =
"Horsepower", ylab = "Miles per Gallon",
main = "Horsepower vs. MPG")
abline(lm(mpg ~ hp, data = mtcars), col =
"red")
Bar Charts
#write syntax and explain parameters in it
Let's start with a simple bar chart graphing the cars vector:
# Define the cars vector with 5 values
A <- c(17, 32, 8, 53, 1)
# Graph cars
barplot(A, xlab = "X-axis", ylab = "Y-axis", main ="Bar-
Chart", col = 'red')
# Plot the bar chart
barplot(A, horiz = TRUE, xlab = "X-axis",
ylab = "Y-axis", main ="Horizontal Bar Chart")
# Create a frequency table for the 'cyl' column in mtcars
table1 <- table(mtcars$cyl)
# Create the bar plot
barplot(table1, main = "Car Frequency by Number of
Cylinders", xlab = "Number of Cylinders")
# Create the data for the chart
A <- c(17, 2, 8, 13, 1, 22)
B <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun")
# Plot the bar chart with text features
barplot(A, names.arg = B, xlab = "Month",
ylab = "Articles", col = "steelblue",
main = "GeeksforGeeks - Article Chart",
cex.main = 1.5, cex.lab = 1.2,
cex.axis = 1.1)
# Add data labels on top of each bar
text(x = barplot(A, names.arg = B, col = "steelblue",
ylim = c(0, max(A) * 1.2)),
y = A + 1, labels = A, pos = 3, cex = 1.2,
col = "black")
# Create the data for the chart
colors = c("green", "orange", "brown")
months <- c("Mar", "Apr", "May", "Jun",
"Jul")
regions <- c("East", "West", "North")
# Create the matrix of the values.
Values <- matrix(c(2, 9, 3, 11, 9, 4, 8, 7, 3,
12, 5, 2, 8, 10, 11),
nrow = 3, ncol = 5,
byrow = TRUE)
# Create the bar chart
barplot(Values, main = "Total Revenue",
names.arg = months, xlab = "Month", ylab = "Revenue",
col = colors, beside = TRUE)
# Add the legend to the chart
legend("topleft", regions, cex = 0.7, fill = colors)
# Create the data for the chart
colors = c("green", "orange", "brown")
months <- c("Mar", "Apr", "May", "Jun", "Jul")
regions <- c("East", "West", "North")
# Create the matrix of the values.
Values <- matrix(c(2, 9, 3, 11, 9, 4, 8, 7, 3, 12, 5, 2, 8,
10, 11),
nrow = 3, ncol = 5, byrow = TRUE)
# Create the bar chart
barplot(Values, main = "Total Revenue", names.arg = months,
xlab = "Month", ylab = "Revenue", col = colors)
# Add the legend to the chart
legend("topleft", regions, cex = 0.7, fill = colors)
Histograms
#write syntax and explain parameters in it
Let's start with a simple histogram graphing the distribution of the suvs vector:
# Define the suvs vector with 5 values
suvs <- c(4,4,6,6,16)
# Create a histogram for suvs
hist(suvs)
# Create data for the graph.
v <- c(19, 23, 11, 5, 16, 21, 32,
14, 19, 27, 39)
# Create the histogram.
hist(v, xlab = "No.of Articles ",
col = "green", border = "black")
# Create data for the graph.
v <- c(19, 23, 11, 5, 16, 21, 32, 14, 19, 27, 39)
# Create the histogram.
hist(v, xlab = "No.of Articles", col = "green",
border = "black", xlim = c(0, 50),
ylim = c(0, 5), breaks = 5)
# Create data for the graph.
v <- c(19, 23, 11, 5, 16, 21, 32,
14, 19, 27, 39)
# Creating the histogram.
m<-hist(v, xlab = "Weight", ylab ="Frequency",
col = "darkmagenta", border = "pink",
breaks = 5)
# Setting labels
text(m$mids, m$counts, labels = m$counts,
adj = c(0.5, -0.5))
# Creating data for the graph.
v <- c(19, 23, 11, 5, 16, 21, 32, 14,
19, 27, 39, 120, 40, 70, 90)
# Creating the histogram.
hist(v, xlab = "Weight", ylab ="Frequency",
xlim = c(50, 100),
col = "darkmagenta", border = "pink",
breaks = c(5, 55, 60, 70, 75,
80, 100, 140))
Basic Histogram Shows the distribution of a single variable.
# Load the mtcars dataset
data(mtcars)
# Plot a histogram of the 'mpg' variable
hist(mtcars$mpg,
main = "Histogram of Miles Per Gallon (mpg)",
xlab = "Miles Per Gallon (mpg)",
col = "blue",
border = "black")
Histogram with Density Plot:
Combines a histogram with a density plot to show the distribution
and smooth curve of the data.
hist(mtcars$mpg, probability=TRUE, main="Histogram of Miles
Per Gallon with Density", xlab="Miles Per Gallon (mpg)",
col="lightgreen", border="black")
lines(density(mtcars$mpg), col="blue", lwd=2)
Grouped Histogram: Displays histograms of multiple variables on
the same plot for comparison.
hist(mtcars$mpg, col=rgb(0.2,0.5,0.5,0.5), xlim=c(10,35),
main="Grouped Histogram of Miles Per Gallon",
xlab="Miles Per Gallon (mpg)")
hist(mtcars$hp, col=rgb(0.5,0.2,0.5,0.5), add=TRUE)
legend("topright", lege,0.5), rgb(0.5,0.2,0.5,0.5)))
nd=c("MPG", "HP"),
fill=c(rgb(0.2,0.5,0.5
Stacked Histogram Shows multiple histograms stacked on top
of each other to illustrate the distribution of different
categories.
hist(mtcars$mpg, col=rgb(0.2,0.5,0.5,0.5), xlim=c(10,35),
main="Stacked Histogram of MPG and HP", xlab="Value")
hist(mtcars$hp, col=rgb(0.5,0.2,0.5,0.5), add=TRUE)
legend("topright", legend=c("MPG", "HP"),
fill=c(rgb(0.2,0.5,0.5,0.5), rgb(0.5,0.2,0.5,0.5)))
Pie Charts
#write syntax and explain parameters in it
Let's start with a simple pie chart graphing the cars vector:
# Define cars vector with 5 values
cars <- c(1, 3, 6, 4, 9)
# Create a pie chart for cars
pie(cars)
Now let's add a heading, change the colors, and define our own labels:
# Define cars vector with 5 values
cars <- c(1, 3, 6, 4, 9)
# Create a pie chart with defined heading and
# custom colors and labels
pie(cars, main="Cars", col=rainbow(length(cars)),
labels=c("Mon","Tue","Wed","Thu","Fri"))
Now let's change the colors, label using percentages, and create a legend:
# Define cars vector with 5 values
cars <- c(1, 3, 6, 4, 9)
# Define some colors ideal for black & white print
colors <-
c("white","grey70","grey90","grey50","black")
# Calculate the percentage for each day, rounded to
one # decimal place
car_labels <- round(cars/sum(cars) * 100, 1)
# Concatenate a '%' char after each value
car_labels <- paste(car_labels, "%", sep="")
# Create a pie chart with defined heading and custom
colors # and labels
pie(cars, main="Cars", col=colors,
labels=car_labels, cex=0.8)
# Create a legend at the right
legend(1.5, 0.5, c("Mon","Tue","Wed","Thu","Fri"),
cex=0.8, fill=colors)
# Load the mtcars dataset
# Plot a pie chart of the 'cyl' variable
cyl_table <- table(mtcars$cyl)
pie(cyl_table, main="Distribution of Cars by
Cylinders", col=rainbow(length(cyl_table)))
Dotcharts
A dot plot or dot chart is similar to a scatter plot.
The main difference is that the dot plot in R displays the index (each category) in the vertical axis and the
corresponding value in the horizontal axis, so you can see the value of each observation following a horizontal line
from the label.
There are several types of dot charts, like the classical, the Cleveland’s version and Dumbbell dot plots.
How to create Cleveland dot plots in R and Dumbbell charts.
#write syntax and explain parameters in it
#Cleveland dot plots
# CREATE A DATASET
set.seed(1)
month <- month.name
expected <- c(15, 16, 20, 31, 11, 6,
17, 22, 32, 12, 19, 20)
sold <- c(8, 18, 12, 10, 41, 2,
19, 26, 14, 16, 9, 13)
quarter <- c(rep(1, 3), rep(2, 3), rep(3, 3), rep(4, 3))
data <- data.frame(month, expected, sold, quarter)
data
#create a dot chart in R of the sold variable
dotchart(data$sold, labels = data$month, pch = 21, bg = "green", pt.cex = 1.5)
#find out the difference between bg & col functions
#plot dot chart on mtcars data sets
# Groups
colors <- numeric(4)
colors[quarter == "1"] <- "red"
colors[quarter == "2"] <- "blue"
colors[quarter == "3"] <- "green"
colors[quarter == "4"] <- "orange"
dotchart(data$expected, labels = data$month, pch = 19,
pt.cex = 1.5, groups = rev(data$quarter),
color = colors)
#order dot chart
x <- data[order(data$expected), ]
dotchart(x$expected, labels = x$month, pch = 19,
xlim = range(x$expected, x$sold) + c(-2, 2),
pt.cex = 1.5, color = colors, groups =
rev(data$quarter))
#Dumbbell plots
dotchart(data$sold, pch = 21, labels = data$month, bg
= "green",
pt.cex = 1.5, xlim = range(data$expected,
data$sold) + c(-2, 2))
points(data$expected, 1:nrow(data), col = "red", pch =
19, cex = 1.5)
# add segments and texts to label the points
#Write function description
dotchart(data$sold, labels = data$month, pch = 21, bg
= "green",
xlim = range(data$expected, data$sold) + c(-
2, 2),
pt.cex = 1.5)
points(data$expected, 1:nrow(data), col = "red", pch =
19, cex = 1.5)
invisible(sapply(1:nrow(data), function(i) {
segments(min(data$sold[i], data$expected[i]), i,
max(data$sold[i], data$expected[i]), i, lwd
= 2)
text(min(data$sold[i], data$expected[i]) - 1.5, i,
labels = min(data$sold[i], data$expected[i]))
text(max(data$sold[i], data$expected[i]) + 1.5, i,
labels = max(data$sold[i], data$expected[i]))
}))
points(data$expected, 1:nrow(data), col = "red", pch =
19, cex = 1.5)
points(data$sold, 1:nrow(data), col = "red", pch = 21,
bg = "green", cex = 1.5)
Misc
This example shows all 25 symbols that you can use to produce points in your
graphs:
# Make an empty chart
plot(1, 1, xlim=c(1,5.5), ylim=c(0,7), type="n", ann=FALSE)
# Plot digits 0-4 with increasing size and color
text(1:5, rep(6,5), labels=c(0:4), cex=1:5, col=1:5)
# Plot symbols 0-4 with increasing size and color
points(1:5, rep(5,5), cex=1:5, col=1:5, pch=0:4)
text((1:5)+0.4, rep(5,5), cex=0.6, (0:4))
# Plot symbols 5-9 with labels
points(1:5, rep(4,5), cex=2, pch=(5:9))
text((1:5)+0.4, rep(4,5), cex=0.6, (5:9))
# Plot symbols 10-14 with labels
points(1:5, rep(3,5), cex=2, pch=(10:14))
text((1:5)+0.4, rep(3,5), cex=0.6, (10:14))
# Plot symbols 15-19 with labels
points(1:5, rep(2,5), cex=2, pch=(15:19))
text((1:5)+0.4, rep(2,5), cex=0.6, (15:19))
# Plot symbols 20-25 with labels
points((1:6)*0.8+0.2, rep(1,6), cex=2, pch=(20:25))
text((1:6)*0.8+0.5, rep(1,6), cex=0.6, (20:25))