You are on page 1of 9

Correction TP 11 Statistiques

May 8, 2023

#
TP 11 Statistiques : Etude de cas
[2]: # 1 :
import pandas as pd
import matplotlib.pyplot as plt

[3]: # 2 :
Data=pd.read_csv("StudentData.csv")

[4]: # 3 :
Data.head()

[4]: gender race/ethnicity parental level of education lunch \


0 female group B bachelor's degree standard
1 female group C some college standard
2 female group B master's degree standard
3 male group A associate's degree free/reduced
4 male group C some college standard

test preparation course math score reading score writing score


0 none 72 72 74
1 completed 69 90 88
2 none 90 95 93
3 none 47 57 44
4 none 76 78 75

[5]: Data.tail()

[5]: gender race/ethnicity parental level of education lunch \


995 female group E master's degree standard
996 male group C high school free/reduced
997 female group C high school free/reduced
998 female group D some college standard
999 female group D some college free/reduced

test preparation course math score reading score writing score

1
995 completed 88 99 95
996 none 62 55 55
997 completed 59 71 65
998 completed 68 78 77
999 none 77 86 86

[6]: Data.shape

[6]: (1000, 8)

[7]: # 4 :
Data.describe()

[7]: math score reading score writing score


count 1000.00000 1000.000000 1000.000000
mean 66.08900 69.169000 68.054000
std 15.16308 14.600192 15.195657
min 0.00000 17.000000 10.000000
25% 57.00000 59.000000 57.750000
50% 66.00000 70.000000 69.000000
75% 77.00000 79.000000 79.000000
max 100.00000 100.000000 100.000000

[9]: # 5 :
plt.hist(Data["gender"])

[9]: (array([518., 0., 0., 0., 0., 0., 0., 0., 0., 482.]),
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
<a list of 10 Patch objects>)

2
[10]: plt.hist(Data["math score"])

[10]: (array([ 2., 2., 10., 26., 95., 188., 268., 216., 135., 58.]),
array([ 0., 10., 20., 30., 40., 50., 60., 70., 80., 90., 100.]),
<a list of 10 Patch objects>)

3
[11]: plt.hist(Data["lunch"])

[11]: (array([645., 0., 0., 0., 0., 0., 0., 0., 0., 355.]),
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),
<a list of 10 Patch objects>)

[12]: # 6 :
math = Data["math score"]
reading = Data["reading score"]
writing = Data["writing score"]

[14]: # 7 :
print(math.mode())
print(reading.mode())
print(writing.mode())

0 65
dtype: int64
0 72
dtype: int64
0 74
dtype: int64

4
[15]: print(math.median())
print(reading.median())
print(writing.median())

66.0
70.0
69.0

[18]: print(math.quantile(1/4))
print(reading.quantile(1/4))
print(writing.quantile(1/4))
print(math.quantile(1/2))
print(reading.quantile(1/2))
print(writing.quantile(1/2))
print(math.quantile(3/4))
print(reading.quantile(3/4))
print(writing.quantile(3/4))

57.0
59.0
57.75
66.0
70.0
69.0
77.0
79.0
79.0

[22]: # 8 :
etendue = max(math) - min(math)
print(etendue)
IQ = math.quantile(3/4) - math.quantile(1/4)
print(IQ)

100
20.0

[23]: # 9 :
plt.boxplot(math)

[23]: {'whiskers': [<matplotlib.lines.Line2D at 0x18091b4eca0>,


<matplotlib.lines.Line2D at 0x18091db7040>],
'caps': [<matplotlib.lines.Line2D at 0x18091db73a0>,
<matplotlib.lines.Line2D at 0x18091db7700>],
'boxes': [<matplotlib.lines.Line2D at 0x18091b4e940>],
'medians': [<matplotlib.lines.Line2D at 0x18091db7a60>],
'fliers': [<matplotlib.lines.Line2D at 0x18091db7d60>],
'means': []}

5
[24]: plt.boxplot(writing)

[24]: {'whiskers': [<matplotlib.lines.Line2D at 0x18091ac10a0>,


<matplotlib.lines.Line2D at 0x18091ac1400>],
'caps': [<matplotlib.lines.Line2D at 0x18091ac1760>,
<matplotlib.lines.Line2D at 0x18091ac1ac0>],
'boxes': [<matplotlib.lines.Line2D at 0x18091abdd00>],
'medians': [<matplotlib.lines.Line2D at 0x18091ac1e20>],
'fliers': [<matplotlib.lines.Line2D at 0x18091b77160>],
'means': []}

6
[26]: plt.boxplot(reading)

[26]: {'whiskers': [<matplotlib.lines.Line2D at 0x18091c0f1f0>,


<matplotlib.lines.Line2D at 0x18091c0f550>],
'caps': [<matplotlib.lines.Line2D at 0x18091c0f8b0>,
<matplotlib.lines.Line2D at 0x18091c0fc10>],
'boxes': [<matplotlib.lines.Line2D at 0x18091bf4e50>],
'medians': [<matplotlib.lines.Line2D at 0x18091c0ff70>],
'fliers': [<matplotlib.lines.Line2D at 0x18091c992b0>],
'means': []}

7
[27]: # 10 :
def effectif(I,P):
eff = 0
for x in P:
if x >= I[0] and x < I[1]:
eff += 1
return eff

[29]: effectif([20,30],math)

[29]: 10

[30]: effectif([30,40],math)

[30]: 26

[31]: #11 :
def moyenne(S):
somme = 0
nb = 0
for x in S:
somme += x
nb += 1
return somme / nb

[32]: moyenne(math)

8
[32]: 66.089

[33]: moyenne(reading)

[33]: 69.169

[34]: moyenne(writing)

[34]: 68.054

[42]: # 12 :
# Méthode 1:
def variance_1(S):
somme = 0
nb = 0
for x in S:
somme += x**2
nb += 1
return somme / nb - moyenne(S)**2

#méthode 2:
def variance_2(S):
somme = 0
nb = 0
for x in S :
somme += (x - moyenne(S))**2
nb += 1
return somme / nb

[43]: variance_1(math)

[43]: 229.68907899999977

[44]: variance_2(math)

[44]: 229.68907900000048

[45]: from math import sqrt

[46]: ecart_type = sqrt(variance_1(math))


print(ecart_type)

15.155496659628142

[ ]:

You might also like