You are on page 1of 15

D.

A LAB ASSIGNMENT-07
{NAME:RUDRASISH MISHRA} | {SECTION:IT-8} | {ROLL NO:1906649}

Q1. You’ve been given a list of twenty blood types for emergency
surgery patients:
A, O, A, B, B, AB, B, B, O, A, O, O, O, AB, B, AB, AB, A, O, A.
WAP to draw a frequency distribution table consisting of distinct
items, frequency, proportion,percent and cumulative frequency.
SOLUTION:
INPUT:
bloodgroup_649 <- c("A", "O", "A", "B", "B", "AB", "B", "B", "O",
"A", "O", "O", "O", "AB", "B", "AB", "AB", "A", "O", "A")
freqtable_649 <- table(bloodgroup_649)
proportion_649 <- prop.table(freqtable_649)
percentagetable.511 <- proportion_649*100
cumulativefreqtable_649 <- cumsum(freqtable_649)

bgframe_649 <- data.frame(freqtable_649,proportion_649 ,


proportion_649 , cumulativefreqtable_649 )
colnames(bgframe_649) <-
c("data","frequency","data1","proportion","data2","percentage(%)"
,"cumulative_frequency")
print(bgframe_649)
bgframe_649= subset(bgframe_649, select = -c(data, data1,
data2) )
cat("Frequency Distribution Table:-")
bgframe_649
OUTPUT:
Q2. WAP to calculate the coefficient of skewness based on mean
and median for the following distribution:

SOLUTION:
INPUT:
distribution_649 <- data.frame(
Class_Interval = c("0-10", "10-20", "20-30", "30-40", "40-50",
"50-60", "60-70", "70-80"),
Frequency = c(6, 12, 22, 48, 56, 32, 18, 6)
)
distribution_649
median_649 = median(distribution_649$Frequency)
print(median_649)
mean_649 = mean(distribution_649$Frequency)
print(mean_649)
stddeviation_649 = sd(distribution_649$Frequency)
print(stddeviation_649)

skewness_649 = 3*(mean_649-median_649)/stddeviation_649
print(skewness_649)
print("The data is average skewed as skewness is between -1 and –
0.5 or between 0.5 and 1 ")
OUTPUT:
Q3. WAP to comment on the nature of distribution:
❖ 14, 14, 14, 14, 14
❖ 11, 12, 14, 16, 17
❖ 1, 3, 6, 8, 42
SOLUTION:
INPUT:
natdist_649 <- data.frame(
x = c(14,14,14,14,14),
y = c(11,12,14,16,17),
z = c(1,3,6,8,42)
)
natdist_649
print("statistical summary of data:-")
print(summary(natdist_649))
print(class(natdist_649))
print(paste("x is",class(natdist_649$x)))
print(paste("y is",class(natdist_649$y)))
print(paste("z is",class(natdist_649$z)))
)
OUTPUT:
Q4. The following facts were gathered from a firm before and
after an industrial update. By making use of the above data,
compare the position of the firm before and after the dispute as
fully as possible.

SOLUTION:
INPUT:
Before_Dispute_649 <- data.frame(
Mean_wages = 850,
Median_wages = 820,
Number_employed = 600,
Standard_distribution = 30,
First_quartile = 750,
Third_quartile = 920,
Modal_wages = 760
)
After_Dispute_649 <- data.frame(
Mean_wages = 900,
Median_wages = 800,
Number_employed = 550,
Standard_distribution = 110,
First_quartile = 750,
Third_quartile = 950,
Modal_wages = 600
)
print(paste("Numbers of Employers decreased =
",Before_Dispute_649$Number_employed-
After_Dispute_649$Number_employed))
twagebd_649 =
Before_Dispute_649$Number_employed*Before_Dispute_649$Mean_wages
print(paste("Wages before Dispute = Rs.",twagebd_649))
twagead_649 =
After_Dispute_649$Number_employed*After_Dispute_649$Mean_wages
print(paste("Wages after Dispute = Rs.",twagead_649))
print(paste("Total decreased wages = Rs.", twagebd_649 -
twagead_649))
print(paste("The median & modal wage have decreased
",Before_Dispute_649$Median_wages-
After_Dispute_649$Median_wages,"and
",Before_Dispute_649$Modal_wages-After_Dispute_649$Modal_wages))
print(paste("Q1 has not changed but Q2 has decreased slightly
also Q3 has increased ",After_Dispute_649$Third_quartile-
Before_Dispute_649$Third_quartile,"."))
cvbefored_649 =
Before_Dispute_649$Standard_distribution/Before_Dispute_649$Mean_
wages*100
print(paste("C.V. before dispute = ",cvbefored_649,"%"))
cvafterd_649 =
After_Dispute_649$Standard_distribution/After_Dispute_649$Mean_wa
ges*100
print(paste("C.V. after dispute) = ",cvafterd_649,"%"))

print(paste("Measure of skewness are:"))


pearsonmbefored_649 = (Before_Dispute_649$Mean_wages-
Before_Dispute_649$Modal_wages)/Before_Dispute_649$Standard_distr
ibution
pearsonmafterd_649 = (After_Dispute_649$Mean_wages-
After_Dispute_649$Modal_wages)/After_Dispute_649$Standard_distrib
ution
pearsonmafterd_649 = (Before_Dispute_649$Third_quartile-
(2*Before_Dispute_649$Median_wages)
+Before_Dispute_649$First_quartile)/
(Before_Dispute_649$Third_quartile-
Before_Dispute_649$First_quartile)
bowleymafterd_649 = (After_Dispute_649$Third_quartile-
(2*After_Dispute_649$Median_wages)
+After_Dispute_649$First_quartile)/
(After_Dispute_649$Third_quartile-
After_Dispute_649$First_quartile)
print(paste("Pearson's Measure before dispute =
",pearsonmbefored_649))
print(paste("Pearson's Measure after dispute =
",pearsonmafterd_649))
print(paste("Bowley's Measure before dispute =
",pearsonmafterd_649))
print(paste("Bowley's Measure after dispute =
",bowleymafterd_649))
OUTPUT:

Q5. WAP to comment on the nature of skewness:


● Size of items: 10-12 12-14 14-16 16-18 18-20
Frequency 27 20 12 6 3
● Size of items: 10-12 12-14 14-16 16-18 18-20
Frequency 3 6 12 20 27
SOLUTION:
INPUT:
M_649 <- data.frame(
Size_of_items = c("10-12","12-14","14-16","16-18","18-20"),
Frequency = c(27,20,12,6,3)
)
N_649 <- data.frame(
Size_of_items = c("10-12","12-14","14-16","16-18","18-20"),
Frequency = c(3,6,12,20,27)
)
print(summary(M_649))
print(summary(N_649))
skewness1_649 = -3*(mean(M_649$Frequency)-
median(M_649$Frequency))/sd(M_649$Frequency)
print(skewness1_649)
plot(M_649$Frequency, main = "Negatively skewed")
lines(M_649$Frequency)
skewness2_649=3*(mean(N_649$Frequency)-median(N_649$Frequency))/
sd(N_649$ Frequency)
print(skewness2_649)
plot(N_649$Frequency, main = "Positively skewed")
lines(N_649$Frequency)
OUTPUT:

Q6. For the following marks of 36 students in an examination, WAP


to exhibit:
● Measures of Frequency
● Measures of Central Tendency
● Measures of Dispersion or Variation
● Measures of Position

SOLUTION:
INPUT:

marks_649 <-
c(55,75,65,30,90,55,40,50,60,80,80,76,95,75,55,45,65,80,30,50,75,85,80
,90,75,75,90,65,78,72,82,52,62,67,66,65,88,45,70)
freq_649 <- table(marks_649)
print(freq_649)

#measures of central tendency


print(paste("Mean =",mean(marks_649)))
print(paste("Median =",median(marks_649)))
getMode <- function(v){
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
print(paste("Mode =",getMode(marks_649)))

#measures of Dispersion or Variation


print(paste("Range =",(max(marks_649)-min(marks_649))))
print(paste("Interquartile Range =",IQR(marks_649)))
print(paste("Variance =", var(marks_649)))
print(paste("Standard Deviation =",sd(marks_649)))

#measures of Position
res_649 <- quantile(marks_649, probs = c(0,0.25,0.5,0.75,1))
print("Quantile =")
print(res_649)
print("Five Number Summary =")
print(summary(marks_649))
print("Z-Score =")
print((marks_649-mean(marks_649))/sd(marks_649))
OUTPUT:

Q7. WAP to determine the types of kurtosis for the data values 0, 3,
4, 1, 2, 3, 0, 2, 1,
3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999.
SOLUTION:
INPUT:

kurtosis_649 <- c(0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5,


2, 3, 999)
print(kurtosis(kurtosis_649))
print(paste("Excess Kurtosis =",(kurtosis(kurtosis_649)-3),". It gives
positive value and It is Leptokurtic Distribution."))

OUTPUT:

Q8. A small study is conducted involving 17 infants to investigate the


association between gestational age at birth, measured in weeks, and
birth weight,measured in grams. WAP to calculate correlation
coefficient and determine whether there is an association between the
two variables?
SOLUTION:
INPUT:
infant_649<- data.frame(
Infant_ID = c(1:17),
Gestational_Age_weeks = c(34.7, 36, 29.3, 40.1, 35.7, 42.4, 40.3,
37.3, 40.9, 38.3, 38.5, 41.4, 39.7, 39.7, 41.1, 38.0, 38.7),
Birth_weight_gm =
c(1895,2030,1440,2835,3090,3827,3260,2690,3285,2920,3430,3657,3685,334
5,3260,2680,2005)
)
print(infant_649)
print(paste("Correlation =",cor(infant_649$Gestational_Age_weeks,
infant_649$Birth_weight_gm)))

print(paste("Covariance =",cov(infant_649$Gestational_Age_weeks,
infant_649$Birth_weight_gm)))
plot(infant_649$Gestational_Age_weeks,infant_649$Birth_weight_gm, main
= "Birth Weight vs. Gestational Age", pch = 19, frame = FALSE)
abline(lm(infant_649$Birth_weight_gm ~
infant_649$Gestational_Age_weeks), col = "blue")
OUTPUT:

Q9. Nine students held their breath, once after breathing normally and
relaxing for one minute, and once after hyperventilating for one
minute. The table indicates how long (in sec) they were able to hold
their breath. Is there an association between the two variables?
SOLUTION:
INPUT:

df_649 <- data.frame(


Subject = c('A','B','C','D','E','F','G','H','I'),
Normal = c(56,56,65,65,50,25,87,44,35),
Hypervent = c(87,91,85,91,75,28,122,66,58)
)
print(df_649)
print(paste("Correlation =",cor(df_649$Normal,df_649$Hypervent)))
print(paste("Covariance =",cov(df_649$Normal,df_649$Hypervent)))
plot(df_649$Normal,df_649$Hypervent, main = "Hypervent vs.
Normal", pch = 19, frame = FALSE)
abline(lm(df_649$Hypervent~df_649$Normal), col = "blue")

OUTPUT:
Q10. Find the Standard Score, Quartile, and The Five Number
Summary of the data in below figure.53

SOLUTION:
INPUT:

occurance_649 <- c(4, 1, 2, 1, 3, 2, 1, 1)


value_649 <- c(0:7)
barplot(occurance_649, names.arg = value_649, xlab ="649_Values",
ylab ="649_Occurences", col ="blue")
res_649 <- quantile(occurance_649, probs = c(0,0.25,0.5,0.75,1))
print(res_649)
print(summary(occurance_649))
zscore_649 <- (occurance_649 -
mean(occurance_649))/sd(occurance_649)
data_649<- data.frame(value_649, occurance_649, zscore_649)
colnames(data_649) <- c("Values_649","Occurences_649","Standard
Score_649")
print(data_649)

OUTPUT:
Q11. Given the following return information, what is the
covariance and correlation coefficient between the return of
Stock A and the return of the market index?

SOLUTION:
INPUT:

stock_649 <- data.frame(


Month = c(1:5),
Return_of_stock_A = c(2.3,2.5,1.9,2.4,2.1),
Return_of_Market_Index = c(1.3,5.0,0.8,1.9,1.1)
)
print(stock_649 )
print(paste("Correlation Coefficient=",cor(stock_649
$Return_of_stock_A,stock_649 $Return_of_Market_Index)))
print(paste("Covariance =",cov(stock_649 $Return_of_stock_A,stock_649
$Return_of_Market_Index)))

OUTPUT:
Q12. Find the covariance and correlation coefficient of eruption
duration and waiting time in the data set is faithful. Observe if
there is any linear relationship between the two variables.
SOLUTION:
INPUT:
data()
data(faithful)
View(faithful)
summary(faithful)
print(paste("Correlation
Coefficient=",cor(faithful$eruptions,faithful$waiting)))
print(paste("Covariance =",cov(faithful$eruptions,faithful$waiting)))
plot(faithful$eruptions,faithful$waiting,col = "blue",main =
"Eruptions & Waiting Regression",pch = 19,frame = FALSE,xlab =
"Faithful$Eruptions",ylab = "Faithful$Waiting")
abline(lm(faithful$waiting~faithful$eruptions),col = "red")
print("Linear Relation =")
association_649 <- lm(faithful$eruptions~faithful$waiting)
print(association_649)
print(summary(association_649))

DATASET:

OUTPUT:
{NAME:RUDRASISH MISHRA} | {SECTION:IT-8} | {ROLL NO:1906649}

You might also like