STA3010 SME Assignment
Mark Bilahi M’rabu
2023-10-30
Import the SME Dataset
#1. Updated Data Set
library(readxl)
#install.packages('csv')
library(csv)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('tidyverse', repos='http://cran.us.r-project.org')
#install.packages('finalfit', repos='http://cran.us.r-project.org')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse
2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.3 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ──────────────────────────────────────────
tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all
conflicts to become errors
library(finalfit)
library(lubridate)
Data = read.csv('D:/Documents/School
Documents/USIU/Y2/Y2S3/STA3010/UpdatedSMEData.csv')
View(Data)
#2. Rough Data Set
library(readxl)
Updated_SME_Data <- read_excel("D:/Documents/School
Documents/USIU/Y2/Y2S3/STA3010/Updated SME Data.xlsx")
Data_old = Updated_SME_Data
View(Data_old)
1. To investigate the relationship between the nature of work in an industry &
the employment of DAPs.
#Find out how many enterprises are in each industry
library(ggplot2)
Ind_resp = data.frame(Data$Category)
#View(Ind_resp)
x = data.frame(table(Ind_resp))
#View(x)
Industry = x$Data.Category
No_of_Enterprises = x$Freq
Industry_Figures = ggplot(x, aes(x = Industry, y = No_of_Enterprises)) +
geom_bar(stat = "identity", color = "black") + theme(axis.text.x =
element_text(angle = 90, hjust = 1))
#Industry_Figures
#Find out how many Firms employ DAPs per industry
Industry_Names = c(x$Data.Category)
Industry_Names
## [1] Agriculture Car wash / Laundry
## [3] Consultancy Education
## [5] Electronic repairs / garages Energy
## [7] Fashion Financial Services
## [9] Healthcare ICT
## [11] Manufacturing Real Estate & construction
## [13] Retail Tourism and hospitality
## [15] Transport and Infrastructure Waste management
## 16 Levels: Agriculture Car wash / Laundry Consultancy ... Waste management
#Agriculture
Agri_Data = Data[Data[, 8] == 'Agriculture', ]
#View(Agri_Data)
Agri_Daps = data.frame(table(Agri_Data$HaveDAPs))
#View(Agri_Daps)
Agri_Daps_P = (Agri_Daps$Freq[2] / sum(Agri_Daps$Freq)) * 100
Agri_Daps_P
## [1] 6.521739
#Car Wash / Laundry
CWL_Data = Data[Data[, 8] == "Car wash / Laundry", ]
#View(CWL_Data)
CWL_Daps = data.frame(table(CWL_Data$HaveDAPs))
#View(CWL_Daps)
CWL_Daps_P = (CWL_Daps$Freq[2] / sum(CWL_Daps$Freq)) * 100
CWL_Daps_P
## [1] NA
#Consultancy
Cons_Data = Data[Data[, 8] == "Consultancy", ]
#View(Cons_Data)
Cons_Daps = data.frame(table(Cons_Data$HaveDAPs))
#View(Cons_Daps)
Cons_Daps_P = (Cons_Daps$Freq[2] / sum(Cons_Daps$Freq)) * 100
Cons_Daps_P
## [1] 25
#Education
Edu_Data = Data[Data[, 8] == "Education", ]
#View(Edu_Data)
Edu_Daps = data.frame(table(Edu_Data$HaveDAPs))
#View(Edu_Daps)
Edu_Daps_P = (Edu_Daps$Freq[2] / sum(Edu_Daps$Freq)) * 100
Edu_Daps_P
## [1] 20.83333
#Electronic repairs / garages
ERG_Data = Data[Data[, 8] == "Electronic repairs / garages", ]
#View(ERG_Data)
ERG_Daps = data.frame(table(ERG_Data$HaveDAPs))
#View(ERG_Daps)
ERG_Daps_P = (ERG_Daps$Freq[2] / sum(ERG_Daps$Freq)) * 100
ERG_Daps_P
## [1] NA
#Energy
Enrg_Data = Data[Data[, 8] == "Energy", ]
#View(Enrg_Data)
Enrg_Daps = data.frame(table(Enrg_Data$HaveDAPs))
#View(Enrg_Daps)
Enrg_Daps_P = (Enrg_Daps$Freq[2] / sum(Enrg_Daps$Freq)) * 100
Enrg_Daps_P
## [1] 5.555556
#Fashion
Fshn_Data = Data[Data[, 8] == "Fashion", ]
#View(Fshn_Data)
Fshn_Daps = data.frame(table(Fshn_Data$HaveDAPs))
#View(Fshn_Daps)
Fshn_Daps_P = (Fshn_Daps$Freq[2] / sum(Fshn_Daps$Freq)) * 100
Fshn_Daps_P
## [1] 2.040816
#Financial Services
FinSrv_Data = Data[Data[, 8] == "Financial Services", ]
#View(FinSrv_Data)
FinSrv_Daps = data.frame(table(FinSrv_Data$HaveDAPs))
#View(FinSrv_Daps)
FinSrv_Daps_P = (FinSrv_Daps$Freq[2] / sum(FinSrv_Daps$Freq)) * 100
FinSrv_Daps_P
## [1] 13.46154
#Healthcare
HlthCr_Data = Data[Data[, 8] == "Healthcare", ]
#View(HlthCr_Data)
HlthCr_Daps = data.frame(table(HlthCr_Data$HaveDAPs))
#View(HlthCr_Daps)
HlthCr_Daps_P = (HlthCr_Daps$Freq[2] / sum(HlthCr_Daps$Freq)) * 100
HlthCr_Daps_P
## [1] 10.60606
#ICT
ICT_Data = Data[Data[, 8] == "ICT", ]
#View(ICT_Data)
ICT_Daps = data.frame(table(ICT_Data$HaveDAPs))
#View(ICT_Daps)
ICT_Daps_P = (ICT_Daps$Freq[2] / sum(ICT_Daps$Freq)) * 100
ICT_Daps_P
## [1] 14.13043
#Manufacturing
Mnfct_Data = Data[Data[, 8] == "Manufacturing", ]
#View(Mnfct_Data)
Mnfct_Daps = data.frame(table(Mnfct_Data$HaveDAPs))
#View(Mnfct_Daps)
Mnfct_Daps[2, 2]
## [1] 20
Mnfct_Daps_P = (Mnfct_Daps$Freq[2] / sum(Mnfct_Daps$Freq)) * 100
Mnfct_Daps_P
## [1] 7.633588
#Real Estate & construction
REC_Data = Data[Data[, 8] == "Real Estate & construction", ]
#View(REC_Data)
REC_Daps = data.frame(table(REC_Data$HaveDAPs))
#View(REC_Daps)
REC_Daps_P = (REC_Daps$Freq[2] / sum(REC_Daps$Freq)) * 100
REC_Daps_P
## [1] 25
#Retail
Rtl_Data = Data[Data[, 8] == "Retail", ]
#View(Rtl_Data)
Rtl_Daps = data.frame(table(Rtl_Data$HaveDAPs))
#View(Rtl_Daps)
Rtl_Daps_P = (Rtl_Daps$Freq[2] / sum(Rtl_Daps$Freq)) * 100
Rtl_Daps_P
## [1] 5.96206
#Tourism and hospitality
TrsmHsp_Data = Data[Data[, 8] == "Tourism and hospitality", ]
#View(TrsmHsp_Data)
TrsmHsp_Daps = data.frame(table(TrsmHsp_Data$HaveDAPs))
#View(TrsmHsp_Daps)
TrsmHsp_Daps_P = (TrsmHsp_Daps$Freq[2] / sum(TrsmHsp_Daps$Freq)) * 100
TrsmHsp_Daps_P
## [1] 1.851852
#Transport and Infrastructure
TrnsInf_Data = Data[Data[, 8] == "Transport and Infrastructure", ]
#View(TrnsInf_Data)
TrnsInf_Daps = data.frame(table(TrnsInf_Data$HaveDAPs))
#View(TrnsInf_Daps)
TrnsInf_Daps_P = (TrnsInf_Daps$Freq[2] / sum(TrnsInf_Daps$Freq)) * 100
TrnsInf_Daps_P
## [1] 6.25
#Waste management
WstMng_Data = Data[Data[, 8] == "Waste management", ]
#View(WstMng_Data)
WstMng_Daps = data.frame(table(WstMng_Data$HaveDAPs))
#View(WstMng_Daps)
WstMng_Daps_P = (WstMng_Daps$Freq[2] / sum(WstMng_Daps$Freq)) * 100
WstMng_Daps_P
## [1] NA
#Make Cumulative Table
DAPs_Responses = data.frame(cbind(Agri_Daps_P, CWL_Daps_P, Cons_Daps_P,
Edu_Daps_P, ERG_Daps_P, Enrg_Daps_P, Fshn_Daps_P, FinSrv_Daps_P,
HlthCr_Daps_P, ICT_Daps_P, Mnfct_Daps_P, REC_Daps_P, Rtl_Daps_P,
TrsmHsp_Daps_P, TrnsInf_Daps_P, WstMng_Daps_P))
#View(DAPs_Responses)
#Plot Final Visualisation
labels = c('Agriculture', 'Car Wash & Laundry', 'Consulting', 'Education',
'Electronic Repairs & Garages', 'Energy', 'Fashion', 'Financial Services',
'Healthcare', 'ICT', 'Manufacturing', 'Real Estate & Construction', 'Retail',
'Tourism & Hospitality', 'Transport & Infrastructure', 'Waste Management')
Percentages = as.numeric(unlist(DAPs_Responses))
plot_data = data.frame(labels, Percentages)
DAPs_Percentages = ggplot(plot_data, aes(x = Percentages, y = labels)) +
geom_bar(stat = "identity", fill = "black") +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) +
labs(x = "Percentage of firms with DAPs", y = "Category")
Industry_Figures
DAPs_Percentages
## Warning: Removed 3 rows containing missing values (`position_stack()`).
#Null = There is no association between the percentage of employment of DAPs
& the industry
#Alternate = There is an association between the percentage of employment of
DAPs & the industry
dapa = DAPs_Responses %>% replace(is.na(.), 0)
dapa
## Agri_Daps_P CWL_Daps_P Cons_Daps_P Edu_Daps_P ERG_Daps_P Enrg_Daps_P
## 1 6.521739 0 25 20.83333 0 5.555556
## Fshn_Daps_P FinSrv_Daps_P HlthCr_Daps_P ICT_Daps_P Mnfct_Daps_P
REC_Daps_P
## 1 2.040816 13.46154 10.60606 14.13043 7.633588
25
## Rtl_Daps_P TrsmHsp_Daps_P TrnsInf_Daps_P WstMng_Daps_P
## 1 5.96206 1.851852 6.25 0
DAPsTest = chisq.test(dapa)
summary(DAPsTest)
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 16 -none- numeric
## expected 16 -none- numeric
## residuals 16 -none- numeric
## stdres 16 -none- numeric
#P-value is 1 thus there is no significant evidence to reject the Null
Hypothesis
##Corrections
#Making contingency table w DAPs counts
#Note: Industries w <5% DAPs employement are merged into 1 (Fshn and TrsmHsp)
# Industries w no DAPs employment have been removed (CWL, ERG, WstMng)
DAPs_Table = data.frame(cbind(Agri_Daps, Cons_Daps[, 2], Edu_Daps[, 2],
Enrg_Daps[, 2], Fshn_Daps[, 2], FinSrv_Daps[, 2], HlthCr_Daps[, 2],
ICT_Daps[, 2], Mnfct_Daps[, 2], REC_Daps[, 2], Rtl_Daps[, 2], TrsmHsp_Daps[,
2], TrnsInf_Daps[, 2]))
colnames(DAPs_Table) = c('Has DAPs', 'Agri', 'Cons', 'Edu', 'Enrg', 'Fshn',
'FinSrv', 'HlthCr', 'ICT', 'Mnfct', 'REC', 'Rtl', 'TrsmHsp', 'TrnsInf')
dt = DAPs_Table
t = 'Totals'
Totals = data.frame(t, sum(dt[1:2, 2]), sum(dt[1:2, 3]), sum(dt[1:2, 4]),
sum(dt[1:2, 5]), sum(dt[1:2, 6]), sum(dt[1:2, 7]), sum(dt[1:2, 8]),
sum(dt[1:2, 9]), sum(dt[1:2, 10]), sum(dt[1:2, 11]), sum(dt[1:2, 12]),
sum(dt[1:2, 13]), sum(dt[1:2, 14]))
colnames(Totals) = c('Has DAPs', 'Agri', 'Cons', 'Edu', 'Enrg', 'Fshn',
'FinSrv', 'HlthCr', 'ICT', 'Mnfct', 'REC', 'Rtl', 'TrsmHsp', 'TrnsInf')
DAPs_Table = rbind(DAPs_Table, Totals[1,1:14])
View(DAPs_Table)
#Merging Fashion with Tourism & Hospitality
DAPs_Table$'Fshn & TrsmHsp' = DAPs_Table$Fshn + DAPs_Table$TrsmHsp
DAPs_Table = DAPs_Table[, -c(6, 13)]
DAPs_Table$'Totals' = rowSums(DAPs_Table[, 2:13])
#Re-running the Chisqd Test with the new Contingency Table
DAPsTest = chisq.test(DAPs_Table[1:2, 2:ncol(DAPs_Table)])
## Warning in chisq.test(DAPs_Table[1:2, 2:ncol(DAPs_Table)]): Chi-squared
## approximation may be incorrect
summary(DAPsTest)
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 26 -none- numeric
## expected 26 -none- numeric
## residuals 26 -none- numeric
## stdres 26 -none- numeric
To Investigate the Funding Requirements of different Industries
#Install Neccessary Packages
#install.packages('nnet', repos='http://cran.us.r-project.org')
library(nnet)
#Find Out the various funding sources per Industry
#Agriculture
Agri_Data_Old = Data_old[Data_old[, 8] == '6', ]
Agri_Funding = data.frame(Agri_Data[, 9])
Agri_Funding_Vals = data.frame(Agri_Data_Old[, 9])
Agri_Funding = cbind(Agri_Funding, Agri_Funding_Vals)
colnames(Agri_Funding) = c('Source', 'Source Val')
#View(Agri_Funding)
Agri_Funding_Count = table(Agri_Funding$Source)
Agri_Funding_Count
##
## Family support Loan from financial institutions
## 4 4
## Own savings
## 26
Agri_Funding_P = prop.table(Agri_Funding_Count)*100
plot(Agri_Funding_P)
#Car Wash / Laundry
CWL_Data_Old = Data_old[Data_old[, 8] == '14', ]
CWL_Funding = data.frame(CWL_Data[, 9])
CWL_Funding_Vals = data.frame(CWL_Data_Old[, 9])
CWL_Funding = cbind(CWL_Funding, CWL_Funding_Vals)
colnames(CWL_Funding) = c('Source', 'Source Val')
#View(CWL_Funding)
CWL_Funding_Count = table(CWL_Funding$Source)
CWL_Funding_Count
##
## Own savings
## 8
CWL_Funding_P = prop.table(CWL_Funding_Count, margin = 1)*100
plot(CWL_Funding_P)
#Consultancy
Cons_Data_Old = Data_old[Data_old[, 8] == '15', ]
Cons_Funding = data.frame(Cons_Data[, 9])
Cons_Funding_Vals = data.frame(Cons_Data_Old[, 9])
Cons_Funding = cbind(Cons_Funding, Cons_Funding_Vals)
colnames(Cons_Funding) = c('Source', 'Source Val')
#View(Cons_Funding)
Cons_Funding_Count = table(Cons_Funding$Source)
Cons_Funding_Count
##
## Family support Own savings
## 1 5
Cons_Funding_P = prop.table(Cons_Funding_Count)*100
plot(Cons_Funding_P)
#Education
Edu_Data_Old = Data_old[Data_old[, 8] == '10', ]
Edu_Funding = data.frame(Edu_Data[, 9])
Edu_Funding_Vals = data.frame(Edu_Data_Old[, 9])
Edu_Funding = cbind(Edu_Funding, Edu_Funding_Vals)
colnames(Edu_Funding) = c('Source', 'Source Val')
#View(Edu_Funding)
Edu_Funding_Count = table(Edu_Funding$Source)
Edu_Funding_Count
##
## Family support Grants Own savings
## 1 1 11
Edu_Funding_P = prop.table(Edu_Funding_Count)*100
plot(Edu_Funding_P)
#Electronic repairs / garages
ERG_Data_Old = Data_old[Data_old[, 8] == '16', ]
ERG_Funding = data.frame(ERG_Data[, 9])
ERG_Funding_Vals = data.frame(ERG_Data_Old[, 9])
ERG_Funding = cbind(ERG_Funding, ERG_Funding_Vals)
colnames(ERG_Funding) = c('Source', 'Source Val')
#View(ERG_Funding)
ERG_Funding_Count = table(ERG_Funding$Source)
ERG_Funding_Count
##
## Own savings
## 5
ERG_Funding_P = prop.table(ERG_Funding_Count)*100
plot(ERG_Funding_P)
#Energy
Enrg_Data_Old = Data_old[Data_old[, 8] == '1', ]
Enrg_Funding = data.frame(Enrg_Data[, 9])
Enrg_Funding_Vals = data.frame(Enrg_Data_Old[, 9])
Enrg_Funding = cbind(Enrg_Funding, Enrg_Funding_Vals)
colnames(Enrg_Funding) = c('Source', 'Source Val')
#View(Enrg_Funding)
Enrg_Funding_Count = table(Enrg_Funding$Source)
Enrg_Funding_Count
##
## Family support Own savings
## 1 9
Enrg_Funding_P = prop.table(Enrg_Funding_Count)*100
plot(Enrg_Funding_P)
#Fashion
Fshn_Data_Old = Data_old[Data_old[, 8] == '13', ]
Fshn_Funding = data.frame(Fshn_Data[, 9])
Fshn_Funding_Vals = data.frame(Fshn_Data_Old[, 9])
Fshn_Funding = cbind(Fshn_Funding, Fshn_Funding_Vals)
colnames(Fshn_Funding) = c('Source', 'Source Val')
#View(Fshn_Funding)
Fshn_Funding_Count = table(Fshn_Funding$Source)
Fshn_Funding_Count
##
## Family support Grants
## 1 2
## Loan from financial institutions Own savings
## 2 32
Fshn_Funding_P = prop.table(Fshn_Funding_Count)*100
plot(Fshn_Funding_P)
#Financial Services
FinSrv_Data_Old = Data_old[Data_old[, 8] == '2', ]
FinSrv_Funding = data.frame(FinSrv_Data[, 9])
FinSrv_Funding_Vals = data.frame(FinSrv_Data_Old[, 9])
FinSrv_Funding = cbind(FinSrv_Funding, FinSrv_Funding_Vals)
colnames(FinSrv_Funding) = c('Source', 'Source Val')
#View(FinSrv_Funding)
FinSrv_Funding_Count = table(FinSrv_Funding$Source)
FinSrv_Funding_Count
##
## Family support Loan from financial institutions
## 1 5
## Own savings Share sale (offer)
## 23 2
FinSrv_Funding_P = prop.table(FinSrv_Funding_Count)*100
plot(FinSrv_Funding_P)
#Healthcare
HlthCr_Data_Old = Data_old[Data_old[, 8] == '9', ]
HlthCr_Funding = data.frame(HlthCr_Data[, 9])
HlthCr_Funding_Vals = data.frame(HlthCr_Data_Old[, 9])
HlthCr_Funding = cbind(HlthCr_Funding, HlthCr_Funding_Vals)
colnames(HlthCr_Funding) = c('Source', 'Source Val')
#View(HlthCr_Funding)
HlthCr_Funding_Count = table(HlthCr_Funding$Source)
HlthCr_Funding_Count
##
## Family support Grants
## 6 1
## Loan from financial institutions Own savings
## 4 25
HlthCr_Funding_P = prop.table(HlthCr_Funding_Count)*100
plot(HlthCr_Funding_P)
#ICT
ICT_Data_Old = Data_old[Data_old[, 8] == '3', ]
ICT_Funding = data.frame(ICT_Data[, 9])
ICT_Funding_Vals = data.frame(ICT_Data_Old[, 9])
ICT_Funding = cbind(ICT_Funding, ICT_Funding_Vals)
colnames(ICT_Funding) = c('Source', 'Source Val')
#View(ICT_Funding)
ICT_Funding_Count = table(ICT_Funding$Source)
ICT_Funding_Count
##
## Family support Grants
## 4 1
## Loan from financial institutions Own savings
## 3 54
## Share sale (offer)
## 1
ICT_Funding_P = prop.table(ICT_Funding_Count)*100
plot(ICT_Funding_P)
#Manufacturing
Mnfct_Data_Old = Data_old[Data_old[, 8] == '4', ]
Mnfct_Funding = data.frame(Mnfct_Data[, 9])
Mnfct_Funding_Vals = data.frame(Mnfct_Data_Old[, 9])
Mnfct_Funding = cbind(Mnfct_Funding, Mnfct_Funding_Vals)
colnames(Mnfct_Funding) = c('Source', 'Source Val')
#View(Mnfct_Funding)
Mnfct_Funding_Count = table(Mnfct_Funding$Source)
Mnfct_Funding_Count
##
## Family support Grants
## 17 1
## Loan from financial institutions Own savings
## 10 155
Mnfct_Funding_P = prop.table(Mnfct_Funding_Count)*100
plot(Mnfct_Funding_P)
#Real Estate & construction
REC_Data_Old = Data_old[Data_old[, 8] == '8', ]
REC_Funding = data.frame(REC_Data[, 9])
REC_Funding_Vals = data.frame(REC_Data_Old[, 9])
REC_Funding = cbind(REC_Funding, REC_Funding_Vals)
colnames(REC_Funding) = c('Source', 'Source Val')
#View(REC_Funding)
REC_Funding_Count = table(REC_Funding$Source)
REC_Funding_Count
##
## Family support Loan from financial institutions
## 1 1
## Own savings
## 20
REC_Funding_P = prop.table(REC_Funding_Count)*100
plot(REC_Funding_P)
#Retail
Rtl_Data_Old = Data_old[Data_old[, 8] == '11', ]
Rtl_Funding = data.frame(Rtl_Data[, 9])
Rtl_Funding_Vals = data.frame(Rtl_Data_Old[, 9])
Rtl_Funding = cbind(Rtl_Funding, Rtl_Funding_Vals)
colnames(Rtl_Funding) = c('Source', 'Source Val')
#View(Rtl_Funding)
Rtl_Funding_Count = table(Rtl_Funding$Source)
Rtl_Funding_Count
##
## Family support Grants
## 17 2
## Loan from financial institutions Own savings
## 23 218
Rtl_Funding_P = prop.table(Rtl_Funding_Count)*100
plot(Rtl_Funding_P)
#Tourism and hospitality
TrsmHsp_Data_Old = Data_old[Data_old[, 8] == '7', ]
TrsmHsp_Funding = data.frame(TrsmHsp_Data[, 9])
TrsmHsp_Funding_Vals = data.frame(TrsmHsp_Data_Old[, 9])
TrsmHsp_Funding = cbind(TrsmHsp_Funding, TrsmHsp_Funding_Vals)
colnames(TrsmHsp_Funding) = c('Source', 'Source Val')
#View(TrsmHsp_Funding)
TrsmHsp_Funding_Count = table(TrsmHsp_Funding$Source)
TrsmHsp_Funding_Count
##
## Family support Grants
## 2 1
## Loan from financial institutions Own savings
## 2 59
## Share sale (offer)
## 1
TrsmHsp_Funding_P = prop.table(TrsmHsp_Funding_Count)*100
plot(TrsmHsp_Funding_P)
#Transport and Infrastructure
TrnsInf_Data_Old = Data_old[Data_old[, 8] == '5', ]
TrnsInf_Funding = data.frame(TrnsInf_Data[, 9])
TrnsInf_Funding_Vals = data.frame(TrnsInf_Data_Old[, 9])
TrnsInf_Funding = cbind(TrnsInf_Funding, TrnsInf_Funding_Vals)
colnames(TrnsInf_Funding) = c('Source', 'Source Val')
#View(TrnsInf_Funding)
TrnsInf_Funding_Count = table(TrnsInf_Funding$Source)
TrnsInf_Funding_Count
##
## Grants Loan from financial institutions
## 1 4
## Own savings
## 42
TrnsInf_Funding_P = prop.table(TrnsInf_Funding_Count)*100
plot(TrnsInf_Funding_P)
#Waste management
WstMng_Data_Old = Data_old[Data_old[, 8] == '12', ]
WstMng_Funding = data.frame(WstMng_Data[, 9])
WstMng_Funding_Vals = data.frame(WstMng_Data_Old[, 9])
WstMng_Funding = cbind(WstMng_Funding, WstMng_Funding_Vals)
colnames(WstMng_Funding) = c('Source', 'Source Val')
#View(WstMng_Funding)
WstMng_Funding_Count = table(WstMng_Funding$Source)
WstMng_Funding_Count
##
## Own savings
## 2
WstMng_Funding_P = prop.table(WstMng_Funding_Count)*100
plot(WstMng_Funding_P)
#Performing a multinomial logistic regression for multi-class classification
Cat = na.omit(Data$Category[1:822])
Cap = na.omit(Data$SourceofinCapita)
fundingmodel = multinom(formula = Cat ~ Cap, data = Data)
## # weights: 96 (75 variable)
## initial value 2279.067930
## iter 10 value 1802.458317
## iter 20 value 1790.984097
## iter 30 value 1788.457734
## iter 40 value 1787.259380
## iter 50 value 1787.237268
## final value 1787.234892
## converged
summary(fundingmodel)
## Call:
## multinom(formula = Cat ~ Cap, data = Data)
##
## Coefficients:
## (Intercept) CapGrants
## Car wash / Laundry 0.0001486747 -7.983588
## Consultancy -0.6935349790 -5.677565
## Education -0.6935005243 -5.412963
## Electronic repairs / garages -0.6937943841 -4.819626
## Energy -0.6935362021 -5.445360
## Fashion -0.6939746152 -5.300323
## Financial Services -0.0004896646 19.032931
## Healthcare 0.4056077470 -9.627824
## ICT 1.0986967002 17.933288
## Manufacturing 1.7917588852 18.338995
## Real Estate & construction -0.0001466879 -8.017865
## Retail 1.7047185680 18.426030
## Tourism and hospitality 0.4058089649 19.319288
## Transport and Infrastructure 1.2528192334 -14.871784
## Waste management -0.6924409287 -4.537912
## CapLoan from financial institutions CapOwn
savings
## Car wash / Laundry -2.462594e+01 -
0.74733545
## Consultancy 6.583101e-04 -
0.86443394
## Education 5.163232e-04
0.45712919
## Electronic repairs / garages -2.053761e+01 -
0.64101657
## Energy 5.657473e-04
0.31408001
## Fashion 6.940382e-01
1.00764784
## Financial Services 6.473957e-04
0.10057626
## Healthcare 5.106828e-01
0.26087913
## ICT -8.945900e-05
0.01729476
## Manufacturing -8.698860e-02
0.47329725
## Real Estate & construction -6.927538e-01
0.19121261
## Retail 5.465836e-01
0.51563845
## Tourism and hospitality -4.200380e-04
0.77689053
## Transport and Infrastructure -5.596632e-01 -
0.43603662
## Waste management -2.197151e+01 -
2.25175154
## CapShare sale (offer)
## Car wash / Laundry -15.413055
## Consultancy -12.230602
## Education -12.029522
## Electronic repairs / garages -12.305834
## Energy -12.048448
## Fashion -11.890734
## Financial Services -14.911699
## Healthcare -16.756964
## ICT -1.098568
## Manufacturing -1.791510
## Real Estate & construction -15.122468
## Retail -1.704474
## Tourism and hospitality -16.448112
## Transport and Infrastructure -21.054996
## Waste management -12.573154
##
## Std. Errors:
## (Intercept) CapGrants
## Car wash / Laundry 0.9999531 1.966572e-12
## Consultancy 1.2248903 3.376246e-11
## Education 1.2248758 4.361043e-11
## Electronic repairs / garages 1.2249964 4.972862e-11
## Energy 1.2248904 4.226759e-11
## Fashion 1.2250690 4.786644e-11
## Financial Services 1.0001127 1.036138e+00
## Healthcare 0.9128357 3.918896e-13
## ICT 0.8164792 9.347214e-01
## Manufacturing 0.7637541 6.508682e-01
## Real Estate & construction 1.0000269 2.423529e-12
## Retail 0.7686993 6.543528e-01
## Tourism and hospitality 0.9127990 8.207396e-01
## Transport and Infrastructure 0.8017701 3.261946e-15
## Waste management 1.2244443 7.081200e-11
## CapLoan from financial institutions CapOwn
savings
## Car wash / Laundry 9.478665e-10
1.0787250
## Consultancy 1.732087e+00
1.3427387
## Education 1.732108e+00
1.2726425
## Electronic repairs / garages 2.914627e-08
1.3240893
## Energy 1.732114e+00
1.2766793
## Fashion 1.581388e+00
1.2617003
## Financial Services 1.414275e+00
1.0490362
## Healthcare 1.238260e+00
0.9554728
## ICT 1.154696e+00
0.8582030
## Manufacturing 1.083625e+00
0.8008852
## Real Estate & construction 1.581090e+00
1.0469787
## Retail 1.069366e+00
0.8057575
## Tourism and hospitality 1.290962e+00
0.9497175
## Transport and Infrastructure 1.180192e+00
0.8477753
## Waste management 6.966057e-09
1.5973918
## CapShare sale (offer)
## Car wash / Laundry 4.156175e-06
## Consultancy 5.466233e-05
## Education 6.681540e-05
## Electronic repairs / garages 4.828044e-05
## Energy 6.556195e-05
## Fashion 7.665163e-05
## Financial Services 7.017229e-06
## Healthcare 1.629436e-06
## ICT 1.633033e+00
## Manufacturing 1.607283e+00
## Real Estate & construction 5.680393e-06
## Retail 1.609640e+00
## Tourism and hospitality 2.220378e-06
## Transport and Infrastructure 5.060585e-08
## Waste management 3.706534e-05
##
## Residual Deviance: 3574.47
## AIC: 3724.47
## Corrections
# Isolating the 3 main industries by SME count & regressing them against the
other industries
# Retail, Manufacturing & ICT
#Transposing the Funding Percentage tables & creating a new Data Set
(Isolated)
RetailFunding = data.frame(t(Rtl_Funding_P))
RetailFunding = RetailFunding
r5 = data.frame(Var1 = 'A', Var2 = 'x', Freq = 0)
RetailFunding = rbind(RetailFunding, r5)
View(RetailFunding)
ManufacFunding = data.frame(t(Mnfct_Funding_P))
m5 = data.frame(Var1 = 'A', Var2 = 'x', Freq = 0)
ManufacFunding = rbind(ManufacFunding, m5)
View(ManufacFunding)
ICTFunding = data.frame(t(ICT_Funding_P))
View(ICTFunding)
FundingData = cbind(RetailFunding, ManufacFunding, ICTFunding)
View(FundingData)
FundingData = FundingData[, -c(1, 4, 7)]
View(FundingData)
colnames(FundingData) = c('Retail', 'Percent', 'Manufacturing', 'Percent',
'ICT', 'Percent')
#Funding Data Set for other industries
#Agri
AgriFunding = data.frame(t(Agri_Funding_P))
#View(AgriFunding)
a45 = data.frame(Var1 = c('A', 'A'), Var2 = c('x', 'x'), Freq = c(0, 0))
AgriFunding = rbind(AgriFunding, a45)
AgriFunding = AgriFunding[, -c(1)]
#CWL
CWLFunding = data.frame(t(CWL_Funding_P))
#View(CWLFunding)
cw2345 = data.frame(Var1 = c('A', 'A', 'A', 'A'), Var2 = c('x', 'x', 'x',
'x'), Freq = c(0, 0, 0, 0))
CWLFunding = rbind(CWLFunding, cw2345)
CWLFunding = CWLFunding[, -c(1)]
#Cons
ConsFunding = data.frame(t(Cons_Funding_P))
#View(ConsFunding)
cns345 = data.frame(Var1 = c('A', 'A', 'A'), Var2 = c('x', 'x', 'x'), Freq =
c(0, 0, 0))
ConsFunding = rbind(ConsFunding, cns345)
ConsFunding = ConsFunding[, -c(1)]
#Edu
EduFunding = data.frame(t(Edu_Funding_P))
#View(EduFunding)
ed45 = data.frame(Var1 = c('A', 'A'), Var2 = c('x', 'x'), Freq = c(0, 0))
EduFunding = rbind(EduFunding, ed45)
EduFunding = EduFunding[, -c(1)]
#ERG
ERGFunding = data.frame(t(ERG_Funding_P))
#View(ERGFunding)
erg2345 = data.frame(Var1 = c('A', 'A', 'A', 'A'), Var2 = c('x', 'x', 'x',
'x'), Freq = c(0, 0, 0, 0))
ERGFunding = rbind(ERGFunding, erg2345)
ERGFunding = ERGFunding[, -c(1)]
#Enrg
EnrgFunding = data.frame(t(Enrg_Funding_P))
#View(EnrgFunding)
enrgs345 = data.frame(Var1 = c('A', 'A', 'A'), Var2 = c('x', 'x', 'x'), Freq
= c(0, 0, 0))
EnrgFunding = rbind(EnrgFunding, enrgs345)
EnrgFunding = EnrgFunding[, -c(1)]
#Fshn
FshnFunding = data.frame(t(Fshn_Funding_P))
#View(FshnFunding)
fshn5 = data.frame(Var1 = 'A', Var2 = 'x', Freq = 0)
FshnFunding = rbind(FshnFunding, fshn5)
FshnFunding = FshnFunding[, -c(1)]
#FinSrv
FinSrvFunding = data.frame(t(FinSrv_Funding_P))
#View(FinSrvFunding)
finsrv5 = data.frame(Var1 = 'A', Var2 = 'x', Freq = 0)
FinSrvFunding = rbind(FinSrvFunding, finsrv5)
FinSrvFunding = FinSrvFunding[, -c(1)]
#HlthCr
HlthCrFunding = data.frame(t(HlthCr_Funding_P))
#View(HlthCrFunding)
hlthcr5 = data.frame(Var1 = 'A', Var2 = 'x', Freq = 0)
HlthCrFunding = rbind(HlthCrFunding, hlthcr5)
HlthCrFunding = HlthCrFunding[, -c(1)]
#REC
RECFunding = data.frame(t(REC_Funding_P))
#View(RECFunding)
rec45 = data.frame(Var1 = c('A', 'A'), Var2 = c('x', 'x'), Freq = c(0, 0))
RECFunding = rbind(RECFunding, rec45)
RECFunding = RECFunding[, -c(1)]
#TrsmHsp
TrsmHspFunding = data.frame(t(TrsmHsp_Funding_P))
#View(TrsmHspFunding)
TrsmHspFunding = TrsmHspFunding[, -c(1)]
#TrnsInf
TrnsInfFunding = data.frame(t(TrnsInf_Funding_P))
#View(TrnsInfFunding)
trnsinf45 = data.frame(Var1 = c('A', 'A'), Var2 = c('x', 'x'), Freq = c(0,
0))
TrnsInfFunding = rbind(TrnsInfFunding, trnsinf45)
TrnsInfFunding = TrnsInfFunding[, -c(1)]
#WstMng
WstMngFunding = data.frame(t(WstMng_Funding_P))
#View(WstMngFunding)
wstmng2345 = data.frame(Var1 = c('A', 'A', 'A', 'A'), Var2 = c('x', 'x', 'x',
'x'), Freq = c(0, 0, 0, 0))
WstMngFunding = rbind(WstMngFunding, wstmng2345)
WstMngFunding = WstMngFunding[, -c(1)]
#Renaming the Columns
colnames(AgriFunding) = c('Agri', 'Percent')
colnames(CWLFunding) = c('CWL', 'Percent')
colnames(ConsFunding) = c('Cons', 'Percent')
colnames(EduFunding) = c('Edu', 'Percent')
colnames(ERGFunding) =c('ERG', 'Percent')
colnames(EnrgFunding) =c('Enrg', 'Percent')
colnames(FshnFunding) = c('Fshn', 'Percent')
colnames(FinSrvFunding) = c('FinSrv', 'Percent')
colnames(HlthCrFunding) = c('HlthCr', 'Percent')
colnames(RECFunding) = c('REC', 'Percent')
colnames(TrsmHspFunding) = c('TrsmHsp', 'Percent')
colnames(TrnsInfFunding) = c('TrnsInf', 'Percent')
colnames(WstMngFunding) = c('WstMng', 'Percent')
#Combining them into 1 Data Set
FundingData2 = cbind(AgriFunding, CWLFunding, ConsFunding, EduFunding,
ERGFunding, EnrgFunding, FshnFunding, FinSrvFunding, HlthCrFunding,
RECFunding, TrsmHspFunding, TrnsInfFunding, WstMngFunding)
View(FundingData2)
To Evaluate if Staff’s Access to New Tech is a Significant Predictor of a Firm’s
Industry
Industries = na.omit(Data_old$Category)
Women_Access = na.omit(Data_old$womenaccesstonewtech)
Men_Access = na.omit(Data_old$Menaccesstonewtech)
Youth_Access = na.omit(Data_old$YouthaccesstonewTech)
Women_mean = round(mean(Women_Access))
Men_mean = round(mean(Men_Access))
Youth_mean = round(mean(Youth_Access))
Average_Access = data.frame(Women_mean, Men_mean, Youth_mean)
colnames(Average_Access) = c('Women Avg', 'Men Avg', 'Youth Avg')
rownames(Average_Access) = c('Level of Access')
#3 = Moderate
#4 = High
Average_Access
## Women Avg Men Avg Youth Avg
## Level of Access 3 4 4
model1 = glm(Industries~Women_Access+Men_Access+Youth_Access, data =
Data_old, family = poisson)
model2 = glm(Industries~Women_Access+Men_Access, data = Data_old, family =
poisson)
xaxis = c('Women Avg', 'Men Avg', 'Youth Avg')
yaxis = as.numeric(unlist(Average_Access))
plotplot = data.frame(xaxis, yaxis)
AvgAcc_plot = ggplot(plotplot, aes_string(x = "xaxis", y = "yaxis")) +
geom_bar(stat = "identity", fill = "black") +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) +
labs(x = 'Groups', y = 'Level of Access')
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
AvgAcc_plot
summary(model1)
##
## Call:
## glm(formula = Industries ~ Women_Access + Men_Access + Youth_Access,
## family = poisson, data = Data_old)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.96696 0.05995 32.811 < 2e-16 ***
## Women_Access 0.05039 0.01399 3.602 0.000315 ***
## Men_Access -0.05902 0.01543 -3.824 0.000131 ***
## Youth_Access 0.02105 0.01487 1.416 0.156880
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2275.4 on 1214 degrees of freedom
## Residual deviance: 2254.8 on 1211 degrees of freedom
## AIC: 6795.4
##
## Number of Fisher Scoring iterations: 4
summary(model2)
##
## Call:
## glm(formula = Industries ~ Women_Access + Men_Access, family = poisson,
## data = Data_old)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.01126 0.05100 39.436 < 2e-16 ***
## Women_Access 0.05372 0.01381 3.891 9.99e-05 ***
## Men_Access -0.05162 0.01455 -3.548 0.000388 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2275.4 on 1214 degrees of freedom
## Residual deviance: 2256.8 on 1212 degrees of freedom
## AIC: 6795.4
##
## Number of Fisher Scoring iterations: 4