World Cup Data Analysis and Visualization

In [1]: # World Cup Data Analysis
import csv
import numpy as np
import pandas as pd
datafram = pd.read_csv("players.csv")
datafram.head(10)
Out[1]:
surname team position minutes shots passes tackles saves
0 Abdoun Algeria midfielder 16 0 6 0 0
1 Belhadj Algeria defender 270 1 146 8 0
2 Boudebouz Algeria midfielder 74 3 28 1 0
3 Bougherra Algeria defender 270 1 89 11 0
4 Chaouchi Algeria goalkeeper 90 0 17 0 2
5 Djebbour Algeria forward 123 3 19 1 0
6 Ghezzal Algeria forward 40 3 8 0 0
7 Guedioura Algeria midfielder 38 0 18 1 0
8 Halliche Algeria defender 270 2 94 4 0
9 Kadir Algeria midfielder 262 0 104 3 0
In [2]: # World Cup Data Visualization

# Create a scatterplot of players showing passes made (y-axis) versus minutes played (x-axi
s).
# Color each player based on their position (goalkeeper, defender, midfielder, forward).
import csv
import matplotlib.pyplot as plt
File = open('PlayersExt.csv')
rows = csv.DictReader(File)
passesItems = []
minutesItems = []
colorsItems = []
for data in rows:
passesItems.append(float(data['passes']))
minutesItems.append(float(data['minutes']))
if 'goalkeeper' in data['position'] :
colorsItems.append('black')
elif 'defender' in data['position'] :
colorsItems.append('red')
elif 'midfielder' in data['position'] :
colorsItems.append('blue')
elif 'forward' in data['position'] :
colorsItems.append('grey')
else: colorsItems.append('brown')
plt.xlabel('minutes')
plt.ylabel('passes')
plt.scatter(minutesItems,passesItems, c=colorsItems,marker='o')
In [3]: # Create a pie chart showing the relative percentage of teams with 0, 1, and 2 red cards.
import csv
File = open('Teams.csv')
rows = csv.DictReader(File)
Red_cards_0 = 0
Red_cards_1 = 0
Red_cards_2 = 0
for data in rows:
if int(data['redCards']) == 0 :
Red_cards_0=Red_cards_0+1
elif int(data['redCards']) == 1 :
elif int(data['redCards']) == 2 :
colors = ['yellow','red','brown']
plt.pie([Red_cards_0, Red_cards_1,Red_cards_2], labels=['redCards 0','redCards 1','redCards
2'],radius=1.8, colors=colors, autopct='%1.3f%%')
plt.show()
In [4]: # Titanic Data Visualization

# Create a bar chart showing the average fare paid by passengers in each class.
# The three bars should be labeled 'first', 'second', 'third'.
import csv
File = open('Titanic.csv')
row = csv.DictReader(File)
bars = []
heights = []
First_class = 0
Second_class = 0
Third_class = 0
First_class_sum = float(0)
Second_class_sum = float(0)
Third_class_sum = float(0)
for data in row:

if '1' in data['class']:
First_class = First_class + 1
First_class_sum = First_class_sum+ float(data['fare'])
if int(data['class']) == 2 :
Second_class = Second_class+ 1
Second_class_sum = Second_class_sum+ float(data['fare'])
if int(data['class']) == 3 :
Third_class = Third_class + 1
Third_class_sum = Third_class_sum+ float(data['fare'])
heights_1 = int(First_class_sum/First_class)
heights.append(heights_1)
heights_2 = int(Second_class_sum/Second_class)
heights_3 = int(Third_class_sum/Third_class)
bars = ['First', 'Second', 'Third']

plt.xlabel("Class")
plt.ylabel ("Avg Fare")
plt.title("Avg Fare")
plt.bar(bars, heights, label = "Avg Fare", color='lightgreen')
plt.legend()
plt.show()
In [5]: # World Cup with Pandas

# What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?
# Print the player surname
surname = pd.read_csv("players.csv")
answer = surname[surname.team.str.contains('ia') & (surname.minutes < 200) & (surname.passes
>100) & surname.surname]
431 Kuzmanovic
Name: surname, dtype: object
In [6]: # What player on a team with "ia" in the team name played less than 200 minutes and made mor
e than 100 passes?
ds = pd.read_csv("players.csv")
an = ds[ds.team.str.contains('ia') & (ds.minutes < 200) & (ds.passes >100) & ds.surname]
431 Kuzmanovic
Name: surname, dtype: object
In [7]: # How many players on a team with ranking <10 played more than 350 minutes?
import csv
import numpy as np
info = open('PlayersExt.csv')
rows = csv.DictReader(info)
playersNumber = 0
for rf in rows:
if int(rf['ranking'])<10 and int(rf['minutes'])>350:
playersNumber = playersNumber+1
print(playersNumber)
54
In [8]: # Which team has the highest ratio of goalsFor to goalsAgainst? Print the team only.
OpenFile = open('Teams.csv')
row = csv.DictReader(OpenFile)
for j in row:
ratio = int(j['goalsFor'])/int(j['goalsAgainst'])
if ratio ==7:
print(j['team'])
Portugal
In [ ]: # Titanic Data Analysis

# Write a loop that asks the user to enter an age, then returns the number of married women
over that age who
# embarked in Cherbourg. Terminate the loop when the user enters a number that is less than
0.
Married_Women = 0
while True:
OpenFile = open('Titanic.csv')
row = csv.DictReader(OpenFile)
Input_age = input('Enter Age: ')
if Input_age < "0":
break
Married_Women = 0
for i in row:
if 'Mrs.' in i['first'] and 'Cherbourg' in i['embarked'] and i['age']>Input_age:
Enter Age: 25
14 married women 25
Enter Age: 39
10 married women 39
Enter Age: 55
2 married women 55
Enter Age: 65
0 married women 65
Enter Age: 42
9 married women 42

World Cup Data Analysis and Visualization

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

World Cup Data Analysis and Visualization

Uploaded by

Copyright:

Available Formats

In [1]: # World Cup Data Analysis

0 Abdoun Algeria midfielder 16 0 6 0 0

1 Belhadj Algeria defender 270 1 146 8 0

2 Boudebouz Algeria midfielder 74 3 28 1 0

3 Bougherra Algeria defender 270 1 89 11 0

4 Chaouchi Algeria goalkeeper 90 0 17 0 2

5 Djebbour Algeria forward 123 3 19 1 0

6 Ghezzal Algeria forward 40 3 8 0 0

7 Guedioura Algeria midfielder 38 0 18 1 0

8 Halliche Algeria defender 270 2 94 4 0

9 Kadir Algeria midfielder 262 0 104 3 0

In [2]: # World Cup Data Visualization

In [4]: # Titanic Data Visualization

for data in row:

bars = ['First', 'Second', 'Third']

In [5]: # World Cup with Pandas

In [ ]: # Titanic Data Analysis

You might also like