You are on page 1of 4

> mydata$angajabilitate <- as.

factor(mydata$angajabilitate)
> mydata$mediul <- as.factor(mydata$mediul)
> mydata$varsta <- as.factor(mydata$varsta)
> str(mydata)
'data.frame': 51 obs. of 8 variables:
$ Nr..Intrebare : int 1 2 3 4 5 6 7 8 9 10 ...
$ V7 : Factor w/ 2 levels "1","2": 1 1 1 1 2 2 1 1 2 2 ...
$ varsta : Factor w/ 6 levels "1","2","3","4",..: 2 2 6 3 1 3 3 2 2 2
...
$ genul : Factor w/ 2 levels "1","2": 2 1 1 2 1 2 2 2 2 2 ...
$ mediul : Factor w/ 2 levels "1","2": 1 1 2 1 1 1 1 1 1 1 ...
$ studii : int 3 4 3 4 3 3 4 3 3 4 ...
$ angajabilitate: Factor w/ 3 levels "1","3","4": 1 1 1 1 1 1 1 2 2 1 ...
$ venit : int 5 4 4 5 4 4 4 4 4 4 ...
> mydata$venit <- as.factor(mydata$venit)
> summary(mydata)
Nr..Intrebare V7 varsta genul mediul studii
Min. : 1.0 1:36 1: 2 1:29 1:41 Min. :3.000
1st Qu.:13.5 2:15 2:28 2:22 2:10 1st Qu.:3.000
Median :26.0 3:13 Median :4.000
Mean :26.0 4: 2 Mean :3.686
3rd Qu.:38.5 5: 5 3rd Qu.:4.000
Max. :51.0 6: 1 Max. :5.000
angajabilitate venit
1:39 1: 2
3:11 3: 5
4: 1 4:24
5:16
6: 2
7: 2
> # Modelul de baza:
> table(mydata$V7)/nrow(mydata)

1 2
0.7058824 0.2941176
> # Modelul de baza:
> table(mydata$V7)/nrow(mydata)

1 2
0.7058824 0.2941176
> odds <- function(p){
+ odds = p/(1-p)
+ }
> p = c(0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.9)
> a<- odds(p)
> a
[1] 0.1111111 0.2500000 0.6666667 1.0000000 2.3333333
[6] 4.0000000 9.0000000
> log(a)
[1] -2.1972246 -1.3862944 -0.4054651 0.0000000 0.8472979
[6] 1.3862944 2.1972246
> # Instalarea si încarcarea pachetului caTools
> install.packages("caTools")
Installing package into ‘C:/Users/student/Documents/R/win-library/3.5’
(as ‘lib’ is unspecified)
also installing the dependency ‘bitops’
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/bitops_1.0-6.zip
'
Content type 'application/zip' length 38894 bytes (37 KB)
downloaded 37 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/caTools_1.17.1.3


.zip'
Content type 'application/zip' length 329552 bytes (321 KB)
downloaded 321 KB

package ‘bitops’ successfully unpacked and MD5 sums checked


package ‘caTools’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in


C:\Users\student\AppData\Local\Temp\RtmpUDOQyw\downloaded_packages
> library(caTools)
> # Împartirea setului de date în training and test set
> set.seed(123) # Pentru ca de fiecare data sa obtineti acelasi lucru :-)
> # Constructia celor doua submultimi
> train = subset(mydata, split==TRUE)
Error in split == TRUE :
comparison (1) is possible only for atomic and list types
> split = sample.split(mydata$V7, SplitRatio = 0.80)
> test = subset(mydata, split==FALSE)
> # Modelul de regresie logistica
> mydataLog <- glm(V7 ~ varsta + genul + angajabilitate + venit + mediul, dat
a = train, family="binomial")
Error in is.data.frame(data) : object 'train' not found
> # Ce am obtinut?
> summary(mydataLog)
Error in summary(mydataLog) : object 'mydataLog' not found
> # Predictia pe multimea test
> predictTest = predict(mydataLog, type="response", newdata=test)
Error in predict(mydataLog, type = "response", newdata = test) :
object 'mydataLog' not found
> predictTest
Error: object 'predictTest' not found
> # Confusion matrix - prag de 0.5
> table(test$V7, predictTest > 0.5)
Error in table(test$V7, predictTest > 0.5) :
object 'predictTest' not found
> # Acuratetea modelului
> (5+3)/nrow(test)
[1] 0.8
> # Acuratetea modelului de baza, care prezice ca oamenii cumpara online
> table(mydata$V7)/nrow(mydata)

1 2
0.7058824 0.2941176
> # Acuratetea pe multimea test, pe baza de AUC
> install.packages("ROCR")
Installing package into ‘C:/Users/student/Documents/R/win-library/3.5’
(as ‘lib’ is unspecified)
also installing the dependencies ‘gtools’, ‘gdata’, ‘gplots’

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/gtools_3.8.1.zip


'
Content type 'application/zip' length 325812 bytes (318 KB)
downloaded 318 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/gdata_2.18.0.zip


'
Content type 'application/zip' length 1261081 bytes (1.2 MB)
downloaded 1.2 MB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/gplots_3.0.1.1.z


ip'
Content type 'application/zip' length 657063 bytes (641 KB)
downloaded 641 KB

trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/ROCR_1.0-7.zip'


Content type 'application/zip' length 201690 bytes (196 KB)
downloaded 196 KB

package ‘gtools’ successfully unpacked and MD5 sums checked


package ‘gdata’ successfully unpacked and MD5 sums checked
package ‘gplots’ successfully unpacked and MD5 sums checked
package ‘ROCR’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in


C:\Users\student\AppData\Local\Temp\RtmpUDOQyw\downloaded_packages
> library(ROCR)
Loading required package: gplots

Attaching package: ‘gplots’

The following object is masked from ‘package:stats’:

lowess

> # Functia de predictie


> ROCRpred = prediction(predictTest, test$V7)
Error in is.data.frame(predictions) : object 'predictTest' not found
> # Functia performanta
> ROCRperf = performance(ROCRpred, "tpr", "fpr")
Error in performance(ROCRpred, "tpr", "fpr") :
object 'ROCRpred' not found
> # Plot ROC curve
> plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,by=0.1), text.adj=c(
-0.2,1.7))
Error in plot(ROCRperf, colorize = TRUE, print.cutoffs.at = seq(0, 1, :
object 'ROCRperf' not found
> # Calculul valorii AUC
> ROCRpred = prediction(predictTest, test$V7)
Error in is.data.frame(predictions) : object 'predictTest' not found
> auc = as.numeric(performance(ROCRpred, "auc")@y.values)
Error in performance(ROCRpred, "auc") : object 'ROCRpred' not found
> auc # o acuratete buna a modelului logistic
Error: object 'auc' not found
> # Importarea setului de date si verificarea structurii acestuia
> mydata1 <-read.csv("mydata.csv", header = TRUE, sep = ",")
Error in file(file, "rt") : cannot open the connection
In addition: Warning message:
In file(file, "rt") :
cannot open file 'mydata.csv': No such file or directory
> mydata <- data.frame(mydata1)
Error in data.frame(mydata1) : object 'mydata1' not found
> str(mydata1)
Error in str(mydata1) : object 'mydata1' not found

You might also like