You are on page 1of 4

Code Simulation - Data Preparation, Plot

#Topic 1: Getting Data


#method 1 = online link to the dataset
#link = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

#iris = read.csv(url(link), header = FALSE)


#iris

#method 2 = activating the built-in dataset of R


iris
#iris3

#method 3 = download the dataset and link to your program


#file = "C:/Users/Doc Mike/Downloads/iris.data"
#myirisdata = read.csv(file, header = FALSE)
#myirisdata

#Topic 2 = Getting the dimension of your data frame


dim(iris)
colnames(iris) = c("SLen", "SWidth", "PLen", "Pwidth", "Type")
View(iris)

#Topic 2a = Checking Missing Values


is.na(iris)
df = data.frame(c(1,2,3,4), c(44, 55, NA, NA))
print(df)
is.na(df)
any(is.na(df))
any(is.na(iris))
sum(is.na(df))
sum(is.na(iris))

#Topic 3: Visualization using Plot


x = c(1, 2, 3, 4)
y = c(1, 2, 3, 4)
plot(x,y)

plot(iris)
plot(iris$SLen)
plot(iris$SLen, type = "l")
plot(iris$SLen, type = "s", main = "Iris Sepal Length")
plot(iris$SLen, type = "s", main = "Iris Sepal Length", xlab = "Length", ylab = "Rec
Number")

Code Simulation: Data Visualization

#boxplot
#barplot
#histogram

plot(iris$Sepal.Width)
barplot(iris$Sepal.Width,
horiz=TRUE, xlab = "Frequency",
ylab="Value",col = "green",
main = "IRIS Sepal Width Dataset")

boxplot(iris$Sepal.Width, col = "red")


hist(iris$Sepal.Width, xlab = "Sepal Width",
ylab = "Frequency", col = "yellow",
main ="Histogram of Iris Dataset Sepal Width")
View(iris)
pie(iris$Sepal.Width)

Code Simulation: Bar Plot for Categorical Variables


#Topic: Barplot for Categorical Data

# HELP ON PLOTS
?plot

# LOAD DATASETS PACKAGE


require("datasets")

# ONE ROW PER CASE


?chickwts #Documentation of chickwits dataset
chickwts # Look at data
data(chickwts) # Load data into workspace

# Quickest Method
plot(chickwts$feed) # Plot feed from chickwts
?plot

# "barplot" offers more control but must prepare data:


# R doesn't create bar charts directly from the categorical
# variables; instead, we must first create a table that
# has the frequencies for each level of the variable.
feeds <- table(chickwts$feed)
feeds
barplot(feeds) # Identical to plot(chickwts$feed)
?barplot

# To put the bars in descending order, add "order":


barplot(feeds[order(feeds, decreasing = TRUE)])

# Customize the chart


par(oma = c(1, 1, 1, 1)) # Sets outside margins: b, l, t, r
par(mar = c(4, 5, 2, 1)) # Sets plot margins
barplot(feeds[order(feeds)],
horiz = TRUE,
las = 1, # las gives orientation of axis labels
col = c("beige", "blanchedalmond", "bisque1", "bisque2", "bisque3", "bisque4"),
border = NA, # No borders on bars
main = "Frequencies of Different Feeds\nin chickwts Dataset", # \n = line break
xlab = "Number of Chicks")
?par #Help on par
rm(list = ls()) # Clean up Global environment

Code Simulation: Histogram for Quantitative Variables


# LOAD DATASETS PACKAGE
require("datasets")
?lynx
data(lynx) # Annual Canadian Lynx trappings 1821-1934

# Make a histogram using the defaults


hist(lynx)
?hist

# Modify histogram
h <- hist(lynx, # Save histogram as object
breaks = 11, # "Suggests" 11 bins
# breaks = seq(0, 7000, by = 100),
# breaks = c(0, 100, 300, 500, 3000, 3500, 7000),
freq = FALSE,
col = "thistle1", # Or use: col = colors() [626]
main = "Histogram of Annual Canadian Lynx Trappings\n1821-1934",
xlab = "Number of Lynx Trapped")

# IF freq = FALSE, this will draw normal distribution


curve(dnorm(x, mean = mean(lynx), sd = sd(lynx)),
col = "thistle4",
lwd = 2,
add = TRUE)
?curve

rm(list = ls()) # Clean up

Working with color in R


x = c(12, 34, 25, 15, 7, 10)
barplot(x)
barplot (x, col = "yellow")
colors()
barplot(x, col = "navajowhite4")
barplot(x, col = colors() [646])
barplot(x, col = rgb(255, 0, 0, max = 255))
barplot(x, col = rgb(200, 0, 255, max = 255))
barplot(x, col = c("yellow", "wheat", "blue"))

PLOT
Generic X-Y Plotting
Generic function for plotting of R objects. For more details about the graphical parameter arguments, see par.
For simple scatter plots, plot.default will be used. However, there are plot methods for many R objects,
including functions, data.frames, density objects, etc. Use methods(plot) and the documentation for these.
Keywords hplot
Usage plot(x, y, …)
Arguments
x : the coordinates of points in the plot. Alternatively, a single plotting structure, function or  any R object with
a plot method can be provided.
y : the y coordinates of points in the plot, optional if x is an appropriate structure.
… : Arguments to be passed to methods, such as graphical parameters (see par). Many methods will accept the
following arguments:

type - what type of plot should be drawn. Possible types are


"p" for points,
"l" for lines,
"b" for both,
"c" for the lines part alone of "b",
"o" for both ‘overplotted’,
"h" for ‘histogram’ like (or ‘high-density’) vertical lines,
"s" for stair steps,
"S" for other steps, see ‘Details’ below,
"n" for no plotting.
All other types give a warning or an error; using, e.g., type = "punkte" being equivalent to type = "p" for S
compatibility. Note that some methods, e.g.plot.factor, do not accept this.
main : an overall title for the plot: see title.
sub : a sub title for the plot: see title.
xlab : a title for the x axis: see title.
ylab : a title for the y axis: see title.
asp : the y/x aspect ratio, see plot.window.

You might also like