HDV <- read.csv2("HDV_MLIN.csv", row.names ='IDENT' )
str(HDV)
## 'data.frame': 4000 obs. of 7 variables:
## $ LIRE : int 1 1 1 1 0 0 1 1 1 1 ...
## $ ORDI : int 0 0 0 0 0 1 1 1 0 0 ...
## $ SPORT : int 1 1 1 0 0 1 0 1 0 1 ...
## $ GENRE : int 2 1 1 1 1 1 1 2 2 2 ...
## $ AGE : int 60 50 82 58 53 39 22 37 26 25 ...
## $ NIV_ETUDE: int 40 40 15 0 17 0 0 40 15 40 ...
## $ HeuresTV : int 2 30 1 3 3 3 0 1 1 0 ...
#Regression logistique
res3<- glm(ORDI~.,data=HDV,family=binomial())
summary(res3)
##
## Call:
## glm(formula = ORDI ~ ., family = binomial(), data = HDV)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3200 -0.7886 -0.3783 0.8579 2.9900
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.650752 0.205343 8.039 9.06e-16 ***
## LIRE 0.834129 0.087557 9.527 < 2e-16 ***
## SPORT 0.533461 0.081861 6.517 7.19e-11 ***
## GENRE -0.609796 0.080021 -7.620 2.53e-14 ***
## AGE -0.064127 0.002890 -22.191 < 2e-16 ***
## NIV_ETUDE 0.037375 0.003187 11.726 < 2e-16 ***
## HeuresTV 0.051683 0.020913 2.471 0.0135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5233.1 on 3999 degrees of freedom
## Residual deviance: 3962.7 on 3993 degrees of freedom
## AIC: 3976.7
##
## Number of Fisher Scoring iterations: 5
#toutes les coefficients positifs vont faire augmenter la proba ordi=1(ou il faut que les coef doivent etre positifs) #les variables determinantes sont : lire,sport,genre,age,niv_etude,heuresTV
#la matrice de confusion
table(HDV$ORDI,round (predict.glm(res3, newdata=HDV,type = 'response')))
##
## 0 1
## 0 2124 431
## 1 606 839
taux
taux<-(431+606)/(2124+839+431+606)
taux
## [1] 0.25925
HDVTest <- read.csv2("hdv_test.csv", row.names ='IDENT')
matrice de confusion
table(HDVTest$ORDI,round (predict.glm(res3, newdata=HDVTest,type = 'response')))
##
## 0 1
## 0 1962 367
## 1 470 680
print('calcul du taux')
## [1] "calcul du taux"
taux1<-(470+367)/(1962+367+470+680)
taux1
## [1] 0.2405864
dataframe
donnee<-data.frame(c(0,0),c(0,0),c(1,2),c(30,30),c(30,30),c(0,0))
colnames(donnee)<-
c("LIRE","SPORT","GENRE","AGE","NIV_ETUDE","HeuresTV")
donnee
predict(res3,newdata = donnee , type = "response")
## 1 2
## 0.5593205 0.4082051