You are on page 1of 1

In [1]: # World Cup Data Analysis

import csv
import numpy as np
import pandas as pd
datafram = pd.read_csv("players.csv")
datafram.head(10)

Out[1]:
surname team position minutes shots passes tackles saves

0 Abdoun Algeria midfielder 16 0 6 0 0

1 Belhadj Algeria defender 270 1 146 8 0

2 Boudebouz Algeria midfielder 74 3 28 1 0

3 Bougherra Algeria defender 270 1 89 11 0

4 Chaouchi Algeria goalkeeper 90 0 17 0 2

5 Djebbour Algeria forward 123 3 19 1 0

6 Ghezzal Algeria forward 40 3 8 0 0

7 Guedioura Algeria midfielder 38 0 18 1 0

8 Halliche Algeria defender 270 2 94 4 0

9 Kadir Algeria midfielder 262 0 104 3 0

In [2]: # World Cup Data Visualization


# Create a scatterplot of players showing passes made (y-axis) versus minutes played (x-axi
s).
# Color each player based on their position (goalkeeper, defender, midfielder, forward).

import csv
import matplotlib.pyplot as plt
File = open('PlayersExt.csv')
rows = csv.DictReader(File)
passesItems = []
minutesItems = []
colorsItems = []
for data in rows:
passesItems.append(float(data['passes']))
minutesItems.append(float(data['minutes']))
if 'goalkeeper' in data['position'] :
colorsItems.append('black')
elif 'defender' in data['position'] :
colorsItems.append('red')
elif 'midfielder' in data['position'] :
colorsItems.append('blue')
elif 'forward' in data['position'] :
colorsItems.append('grey')
else: colorsItems.append('brown')
plt.xlabel('minutes')
plt.ylabel('passes')
plt.scatter(minutesItems,passesItems, c=colorsItems,marker='o')

In [3]: # Create a pie chart showing the relative percentage of teams with 0, 1, and 2 red cards.

import csv
import matplotlib.pyplot as plt
File = open('Teams.csv')
rows = csv.DictReader(File)
Red_cards_0 = 0
Red_cards_1 = 0
Red_cards_2 = 0
for data in rows:
if int(data['redCards']) == 0 :
Red_cards_0=Red_cards_0+1
elif int(data['redCards']) == 1 :
Red_cards_1=Red_cards_1+1
elif int(data['redCards']) == 2 :
Red_cards_2=Red_cards_2+1
colors = ['yellow','red','brown']
plt.pie([Red_cards_0, Red_cards_1,Red_cards_2], labels=['redCards 0','redCards 1','redCards
2'],radius=1.8, colors=colors, autopct='%1.3f%%')
plt.show()

In [4]: # Titanic Data Visualization


# Create a bar chart showing the average fare paid by passengers in each class.
# The three bars should be labeled 'first', 'second', 'third'.

import csv
import matplotlib.pyplot as plt
File = open('Titanic.csv')
row = csv.DictReader(File)

bars = []
heights = []

First_class = 0
Second_class = 0
Third_class = 0

First_class_sum = float(0)
Second_class_sum = float(0)
Third_class_sum = float(0)

for data in row:


if '1' in data['class']:
First_class = First_class + 1
First_class_sum = First_class_sum+ float(data['fare'])
if int(data['class']) == 2 :
Second_class = Second_class+ 1
Second_class_sum = Second_class_sum+ float(data['fare'])
if int(data['class']) == 3 :
Third_class = Third_class + 1
Third_class_sum = Third_class_sum+ float(data['fare'])

heights_1 = int(First_class_sum/First_class)
heights.append(heights_1)

heights_2 = int(Second_class_sum/Second_class)
heights.append(heights_2)

heights_3 = int(Third_class_sum/Third_class)
heights.append(heights_3)

bars = ['First', 'Second', 'Third']


plt.xlabel("Class")
plt.ylabel ("Avg Fare")
plt.title("Avg Fare")
plt.bar(bars, heights, label = "Avg Fare", color='lightgreen')
plt.legend()
plt.show()

In [5]: # World Cup with Pandas


# What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?
# Print the player surname

surname = pd.read_csv("players.csv")
answer = surname[surname.team.str.contains('ia') & (surname.minutes < 200) & (surname.passes
>100) & surname.surname]

431 Kuzmanovic
Name: surname, dtype: object

In [6]: # What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?

ds = pd.read_csv("players.csv")
an = ds[ds.team.str.contains('ia') & (ds.minutes < 200) & (ds.passes >100) & ds.surname]

431 Kuzmanovic
Name: surname, dtype: object

In [7]: # How many players on a team with ranking <10 played more than 350 minutes?

import csv
import numpy as np
info = open('PlayersExt.csv')
rows = csv.DictReader(info)
playersNumber = 0
for rf in rows:
if int(rf['ranking'])<10 and int(rf['minutes'])>350:
playersNumber = playersNumber+1
print(playersNumber)

54

In [8]: # Which team has the highest ratio of goalsFor to goalsAgainst? Print the team only.

OpenFile = open('Teams.csv')
row = csv.DictReader(OpenFile)

for j in row:
ratio = int(j['goalsFor'])/int(j['goalsAgainst'])
if ratio ==7:
print(j['team'])

Portugal

In [ ]: # Titanic Data Analysis


# Write a loop that asks the user to enter an age, then returns the number of married women
over that age who
# embarked in Cherbourg. Terminate the loop when the user enters a number that is less than
0.

Married_Women = 0
while True:
OpenFile = open('Titanic.csv')
row = csv.DictReader(OpenFile)
Input_age = input('Enter Age: ')
if Input_age < "0":
break
Married_Women = 0
for i in row:
if 'Mrs.' in i['first'] and 'Cherbourg' in i['embarked'] and i['age']>Input_age:

Enter Age: 25
14 married women 25
Enter Age: 39
10 married women 39
Enter Age: 55
2 married women 55
Enter Age: 65
0 married women 65
Enter Age: 42
9 married women 42

You might also like