You are on page 1of 8

11/26/23, 11:39 PM GNN-Football - Colaboratory

1 !pip install basemap

output Requirement
Requirement
already
already
satisfied:
satisfied:
basemap in /usr/local/lib/python3.10/dist-packages (1.3.8)
basemap-data<1.4,>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from basemap) (1.3.2)
Requirement already satisfied: pyshp<2.4,>=1.2 in /usr/local/lib/python3.10/dist-packages (from basemap) (2.3.1)
Requirement already satisfied: matplotlib<3.8,>=1.5 in /usr/local/lib/python3.10/dist-packages (from basemap) (3.7.1)
Requirement already satisfied: pyproj<3.7.0,>=1.9.3 in /usr/local/lib/python3.10/dist-packages (from basemap) (3.6.0)
Requirement already satisfied: numpy<1.26,>=1.21 in /usr/local/lib/python3.10/dist-packages (from basemap) (1.23.5)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->basem
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->basemap)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->base
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->base
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->basema
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->basemap
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->basem
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib<3.8,>=1.5->b
Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from pyproj<3.7.0,>=1.9.3->basemap) (2023
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib<3

1 from sklearn.model_selection import train_test_split


2 from mpl_toolkits.basemap import Basemap
3 from difflib import SequenceMatcher
4 import matplotlib.pyplot as plt
5 import networkx as nx
6 import urllib.request
7 import pandas as pd
8 import numpy as np
9 import zipfile
10 import io

DATA

1 url = "http://www-personal.umich.edu/~mejn/netdata/football.zip"
2 sock = urllib.request.urlopen(url) # open URL
3 s = io.BytesIO(sock.read()) # read into BytesIO "file"
4 sock.close()
5
6 zf = zipfile.ZipFile(s) # zipfile object
7 txt = zf.read("football.txt").decode() # read info file
8 gml = zf.read("football.gml").decode() # read gml data
9 # throw away bogus first line with # from mejn files
10 gml = gml.split("\n")[1:]
11 G = nx.parse_gml(gml) # parse gml data
12 print(txt)

The file football.gml contains the network of American football games


between Division IA colleges during regular season Fall 2000, as compiled
by M. Girvan and M. Newman. The nodes have values that indicate to which
conferences they belong. The values are as follows:

0 = Atlantic Coast
1 = Big East
2 = Big Ten
3 = Big Twelve
4 = Conference USA
5 = Independents
6 = Mid-American
7 = Mountain West
8 = Pacific Ten
9 = Southeastern
10 = Sun Belt
11 = Western Athletic

If you make use of these data, please cite M. Girvan and M. E. J. Newman,
Community structure in social and biological networks,
Proc. Natl. Acad. Sci. USA 99, 7821-7826 (2002).

Correction: Two edges were erroneously duplicated in this data set, and
have been removed (21 SEP 2014)

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 1/8
11/26/23, 11:39 PM GNN-Football - Colaboratory
1 cmap = {0:'#bd2309', 1:'#bbb12d',2:'#1480fa',3:'#14fa2f',4:'#faf214',
2 5:'#2edfea',6:'#ea2ec4',7:'#ea2e40',8:'#577a4d',9:'#2e46c0',
3 10:'#f59422',11:'#8086d9'}
4
5 colors = [cmap[G.nodes[n]['value']] for n in G.nodes()]
6 pos = nx.spring_layout(G, seed=1987)
7
8 plt.figure(figsize=(16, 6))
9 nx.draw_networkx_edges(G, pos, alpha=0.2)
10 nx.draw_networkx_nodes(G, pos, nodelist=G.nodes(),
11 node_color=colors, node_size=100)
12 plt.axis('off')
13 plt.show()

Extract Node & Edge Dataframes

1 node_df = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')


2 node_df.index.name = 'school'
3 node_df.columns = ['conference']
4
5 edge_df = nx.to_pandas_edgelist(G)
6 edge_df

source target

0 BrighamYoung FloridaState

1 BrighamYoung NewMexico

2 BrighamYoung SanDiegoState

3 BrighamYoung Wyoming

4 BrighamYoung Utah

... ... ...

608 Clemson Maryland

609 NevadaLasVegas Hawaii

610 WakeForest Maryland

611 OregonState California

612 TexasChristian Hawaii

613 rows × 2 columns

Add Node Features


Add lat-long coordinates, and previous year rank, wins, and conference wins

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 2/8
11/26/23, 11:39 PM GNN-Football - Colaboratory
1 node_data = {'AirForce': {'Latitude': 38.996093, 'Longitude': -104.857222, 'Rank': 83, 'Wins': 6, 'Conf_wins': 2},
2 'Akron': {'Latitude': 41.075346, 'Longitude': -81.512184, 'Rank': 66, 'Wins': 7, 'Conf_wins': 5},
3 'Alabama': {'Latitude': 33.215775, 'Longitude': -87.538261, 'Rank': 101, 'Wins': 10, 'Conf_wins': 7},
4 'AlabamaBirmingham': {'Latitude': 33.501569, 'Longitude': -86.805677, 'Rank': 52, 'Wins': 5, 'Conf_wins': 4},
5 'Arizona': {'Latitude': 32.248814, 'Longitude': -110.987419, 'Rank': 90, 'Wins': 6, 'Conf_wins': 3},
6 'ArizonaState': {'Latitude': 33.424564, 'Longitude': -111.928001, 'Rank': 88, 'Wins': 6, 'Conf_wins': 5},
7 'Arkansas': {'Latitude': 36.068213, 'Longitude': -94.1748, 'Rank': 103, 'Wins': 8, 'Conf_wins': 4},
8 'ArkansasState': {'Latitude': 35.842722, 'Longitude': -90.674584, 'Rank': 45, 'Wins': 4, 'Conf_wins': 2},
9 'Army': {'Latitude': 39.343777, 'Longitude': -94.915748, 'Rank': 54, 'Wins': 3, 'Conf_wins': 1},
10 'Auburn': {'Latitude': 32.593357, 'Longitude': -85.495163, 'Rank': 105, 'Wins': 5, 'Conf_wins': 2},
11 'BallState': {'Latitude': 40.207953, 'Longitude': -85.413456, 'Rank': 76, 'Wins': 0, 'Conf_wins': 0},
12 'Baylor': {'Latitude': 31.549612, 'Longitude': -97.114885, 'Rank': 21, 'Wins': 1, 'Conf_wins': 0},
13 'BoiseState': {'Latitude': 43.6036, 'Longitude': -116.20871, 'Rank': 41, 'Wins': 10, 'Conf_wins': 5},
14 'BostonCollege': {'Latitude': 42.350876, 'Longitude': -71.106918, 'Rank': 24, 'Wins': 8, 'Conf_wins': 4},
15 'BowlingGreenState': {'Latitude': 41.379698, 'Longitude': -83.630737, 'Rank': 68, 'Wins': 5, 'Conf_wins': 3},
16 'BrighamYoung': {'Latitude': 40.251475, 'Longitude': -111.649241, 'Rank': 78, 'Wins': 8, 'Conf_wins': 5},
17 'Buffalo': {'Latitude': 43.000291, 'Longitude': -78.789165, 'Rank': 70, 'Wins': 0, 'Conf_wins': 0},
18 'California': {'Latitude': 37.871853, 'Longitude': -122.258423, 'Rank': 92, 'Wins': 4, 'Conf_wins': 3},
19 'CentralFlorida': {'Latitude': 28.602378, 'Longitude': -81.20025, 'Rank': 61, 'Wins': 4, 'Conf_wins': 0},
20 'CentralMichigan': {'Latitude': 43.590972, 'Longitude': -84.776213, 'Rank': 75, 'Wins': 4, 'Conf_wins': 3},
21 'Cincinnati': {'Latitude': 39.132827, 'Longitude': -84.514935, 'Rank': 56, 'Wins': 3, 'Conf_wins': 0},
22 'Clemson': {'Latitude': 34.673227, 'Longitude': -82.836745, 'Rank': 4, 'Wins': 6, 'Conf_wins': 5},
23 'Colorado': {'Latitude': 40.006638, 'Longitude': -105.267476, 'Rank': 12, 'Wins': 7, 'Conf_wins': 5},
24 'ColoradoState': {'Latitude': 40.573181, 'Longitude': -105.086386, 'Rank': 79, 'Wins': 8, 'Conf_wins': 5},
25 'Connecticut': {'Latitude': 41.807281, 'Longitude': -72.254646, 'Rank': 115, 'Wins': 4, 'Conf_wins': 3},
26 'Duke': {'Latitude': 36.001465, 'Longitude': -78.939133, 'Rank': 7, 'Wins': 3, 'Conf_wins': 3},
27 'EastCarolina': {'Latitude': 35.60684, 'Longitude': -77.366564, 'Rank': 49, 'Wins': 9, 'Conf_wins': 4},
28 'EasternMichigan': {'Latitude': 42.250631, 'Longitude': -83.624152, 'Rank': 74, 'Wins': 4, 'Conf_wins': 4},
29 'Florida': {'Latitude': 29.643946, 'Longitude': -82.355659, 'Rank': 95, 'Wins': 9, 'Conf_wins': 7},
30 'FloridaState': {'Latitude': 30.441805, 'Longitude': -84.298521, 'Rank': 1, 'Wins': 12, 'Conf_wins': 8},
31 'FresnoState': {'Latitude': 36.811754, 'Longitude': -119.748511, 'Rank': 109, 'Wins': 8, 'Conf_wins': 5},
32 'Georgia': {'Latitude': 33.948006, 'Longitude': -83.377319, 'Rank': 97, 'Wins': 8, 'Conf_wins': 5},
33 'GeorgiaTech': {'Latitude': 33.775529, 'Longitude': -84.396311, 'Rank': 2, 'Wins': 8, 'Conf_wins': 5},
34 'Hawaii': {'Latitude': 19.699575, 'Longitude': -155.080872, 'Rank': 107, 'Wins': 9, 'Conf_wins': 5},
35 'Houston': {'Latitude': 29.766083, 'Longitude': -95.35881, 'Rank': 53, 'Wins': 7, 'Conf_wins': 3},
36 'Idaho': {'Latitude': 46.728709, 'Longitude': -117.012675, 'Rank': 42, 'Wins': 7, 'Conf_wins': 4},
37 'Illinois': {'Latitude': 40.110558, 'Longitude': -88.228333, 'Rank': 35, 'Wins': 8, 'Conf_wins': 4},
38 'Indiana': {'Latitude': 39.16806, 'Longitude': -86.522485, 'Rank': 38, 'Wins': 4, 'Conf_wins': 3},
39 'Iowa': {'Latitude': 41.662607, 'Longitude': -91.55496, 'Rank': 40, 'Wins': 1, 'Conf_wins': 0},
40 'IowaState': {'Latitude': 42.026511, 'Longitude': -93.646396, 'Rank': 14, 'Wins': 4, 'Conf_wins': 1},
41 'Kansas': {'Latitude': 38.954343, 'Longitude': -95.255805, 'Rank': 13, 'Wins': 5, 'Conf_wins': 3},
42 'KansasState': {'Latitude': 39.197221, 'Longitude': -96.585268, 'Rank': 11, 'Wins': 11, 'Conf_wins': 7},
43 'Kent': {'Latitude': 41.149219, 'Longitude': -81.344644, 'Rank': 69, 'Wins': 2, 'Conf_wins': 2},
44 'Kentucky': {'Latitude': 38.030602, 'Longitude': -84.50401, 'Rank': 98, 'Wins': 6, 'Conf_wins': 4},
45 'LouisianaLafayette': {'Latitude': 30.211892, 'Longitude': -92.019939, 'Rank': 63, 'Wins': 2, 'Conf_wins': 0},
46 'LouisianaMonroe': {'Latitude': 32.531076, 'Longitude': -92.067332, 'Rank': 58, 'Wins': 5, 'Conf_wins': 0},
47 'LouisianaState': {'Latitude': 30.412331, 'Longitude': -91.183701, 'Rank': 106, 'Wins': 3, 'Conf_wins': 1},
48 'LouisianaTech': {'Latitude': 32.530628, 'Longitude': -92.652078, 'Rank': 57, 'Wins': 8, 'Conf_wins': 0},
49 'Louisville': {'Latitude': 38.21224, 'Longitude': -85.7589, 'Rank': 50, 'Wins': 7, 'Conf_wins': 4},
50 'Marshall': {'Latitude': 38.42339, 'Longitude': -82.425003, 'Rank': 64, 'Wins': 13, 'Conf_wins': 8},
51 'Maryland': {'Latitude': 38.987227, 'Longitude': -76.94249, 'Rank': 8, 'Wins': 5, 'Conf_wins': 2},
52 'Memphis': {'Latitude': 35.118619, 'Longitude': -89.937385, 'Rank': 51, 'Wins': 5, 'Conf_wins': 4},
53 'MiamiFlorida': {'Latitude': 25.717512, 'Longitude': -80.277861, 'Rank': 23, 'Wins': 9, 'Conf_wins': 6},
54 'MiamiOhio': {'Latitude': 39.50831, 'Longitude': -84.734878, 'Rank': 65, 'Wins': 7, 'Conf_wins': 6},
55 'Michigan': {'Latitude': 42.278046, 'Longitude': -83.73822, 'Rank': 31, 'Wins': 10, 'Conf_wins': 6},
56 'MichiganState': {'Latitude': 42.701329, 'Longitude': -84.481682, 'Rank': 32, 'Wins': 10, 'Conf_wins': 6},
57 'MiddleTennesseeState': {'Latitude': 35.848536, 'Longitude': -86.367036, 'Rank': 62, 'Wins': 3, 'Conf_wins': 0}
58 'Minnesota': {'Latitude': 44.973829, 'Longitude': -93.227746, 'Rank': 34, 'Wins': 8, 'Conf_wins': 5},
59 'Mississippi': {'Latitude': 34.364639, 'Longitude': -89.538196, 'Rank': 104, 'Wins': 8, 'Conf_wins': 4},
60 'MississippiState': {'Latitude': 33.455108, 'Longitude': -88.794282, 'Rank': 102, 'Wins': 10, 'Conf_wins': 6},
61 'Missouri': {'Latitude': 38.940289, 'Longitude': -92.32778, 'Rank': 15, 'Wins': 4, 'Conf_wins': 1},
62 'Navy': {'Latitude': 38.980584, 'Longitude': -76.483799, 'Rank': 59, 'Wins': 5, 'Conf_wins': 0},
63 'Nebraska': {'Latitude': 40.820744, 'Longitude': -96.70047, 'Rank': 10, 'Wins': 12, 'Conf_wins': 7},
64 'Nevada': {'Latitude': 39.542944, 'Longitude': -119.815582, 'Rank': 46, 'Wins': 3, 'Conf_wins': 2},
65 'NevadaLasVegas': {'Latitude': 36.107308, 'Longitude': -115.143755, 'Rank': 84, 'Wins': 3, 'Conf_wins': 1},
66 'NewMexico': {'Latitude': 35.084289, 'Longitude': -106.619873, 'Rank': 82, 'Wins': 4, 'Conf_wins': 3},
67 'NewMexicoState': {'Latitude': 32.279044, 'Longitude': -106.749096, 'Rank': 43, 'Wins': 6, 'Conf_wins': 3},
68 'NorthCarolina': {'Latitude': 35.904973, 'Longitude': -79.047128, 'Rank': 9, 'Wins': 3, 'Conf_wins': 2},
69 'NorthCarolinaState': {'Latitude': 35.78462, 'Longitude': -78.682277, 'Rank': 6, 'Wins': 6, 'Conf_wins': 3},
70 'NorthTexas': {'Latitude': 33.207397, 'Longitude': -97.152722, 'Rank': 47, 'Wins': 2, 'Conf_wins': 1},
71 'NorthernIllinois': {'Latitude': 41.934952, 'Longitude': -88.773479, 'Rank': 73, 'Wins': 5, 'Conf_wins': 5},
72 'Northwestern': {'Latitude': 42.056446, 'Longitude': -87.675305, 'Rank': 39, 'Wins': 3, 'Conf_wins': 1},
73 'NotreDame': {'Latitude': 41.69928, 'Longitude': -86.238899, 'Rank': 60, 'Wins': 5, 'Conf_wins': 0},
74 'Ohio': {'Latitude': 39.32444, 'Longitude': -82.101163, 'Rank': 67, 'Wins': 5, 'Conf_wins': 5},
75 'OhioState': {'Latitude': 40.006845, 'Longitude': -83.030194, 'Rank': 37, 'Wins': 6, 'Conf_wins': 3},
76 'Oklahoma': {'Latitude': 35.205894, 'Longitude': -97.445717, 'Rank': 18, 'Wins': 7, 'Conf_wins': 5},
77 'OklahomaState': {'Latitude': 36.127326, 'Longitude': -97.073649, 'Rank': 20, 'Wins': 5, 'Conf_wins': 3},

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 3/8
11/26/23, 11:39 PM GNN-Football - Colaboratory

78 'Oregon': {'Latitude': 44.044819, 'Longitude': -123.072593, 'Rank': 86, 'Wins': 9, 'Conf_wins': 6},
79 'OregonState': {'Latitude': 44.563713, 'Longitude': -123.279478, 'Rank': 89, 'Wins': 7, 'Conf_wins': 4},
80 'PennState': {'Latitude': 40.798214, 'Longitude': -77.859909, 'Rank': 33, 'Wins': 10, 'Conf_wins': 5},
81 'Pittsburgh': {'Latitude': 40.444271, 'Longitude': -79.960914, 'Rank': 27, 'Wins': 5, 'Conf_wins': 2},
82 'Purdue': {'Latitude': 40.423331, 'Longitude': -86.921044, 'Rank': 36, 'Wins': 7, 'Conf_wins': 4},
83 'Rice': {'Latitude': 29.717154, 'Longitude': -95.404182, 'Rank': 110, 'Wins': 5, 'Conf_wins': 4},
84 'Rutgers': {'Latitude': 40.741713, 'Longitude': -74.174393, 'Rank': 29, 'Wins': 1, 'Conf_wins': 1},
85 'SanDiegoState': {'Latitude': 32.774799, 'Longitude': -117.071869, 'Rank': 81, 'Wins': 5, 'Conf_wins': 3},
86 'SanJoseState': {'Latitude': 37.334744, 'Longitude': -121.880932, 'Rank': 113, 'Wins': 3, 'Conf_wins': 1},
87 'SouthCarolina': {'Latitude': 33.993428, 'Longitude': -81.029972, 'Rank': 100, 'Wins': 0, 'Conf_wins': 0},
88 'SouthernCalifornia': {'Latitude': 34.022415, 'Longitude': -118.28553, 'Rank': 91, 'Wins': 6, 'Conf_wins': 3},
89 'SouthernMethodist': {'Latitude': 32.840623, 'Longitude': -96.785097, 'Rank': 111, 'Wins': 4, 'Conf_wins': 3},
90 'SouthernMississippi': {'Latitude': 31.329445, 'Longitude': -89.333121, 'Rank': 48, 'Wins': 9, 'Conf_wins': 6},
91 'Stanford': {'Latitude': 37.42823, 'Longitude': -122.168861, 'Rank': 85, 'Wins': 8, 'Conf_wins': 7},
92 'Syracuse': {'Latitude': 43.039365, 'Longitude': -76.135223, 'Rank': 25, 'Wins': 7, 'Conf_wins': 3},
93 'Temple': {'Latitude': 39.981143, 'Longitude': -75.155393, 'Rank': 28, 'Wins': 2, 'Conf_wins': 2},
94 'Tennessee': {'Latitude': 35.954331, 'Longitude': -83.929627, 'Rank': 96, 'Wins': 9, 'Conf_wins': 6},
95 'Texas': {'Latitude': 30.284851, 'Longitude': -97.733988, 'Rank': 16, 'Wins': 9, 'Conf_wins': 6},
96 'TexasA&M': {'Latitude': 30.615011, 'Longitude': -96.342476, 'Rank': 17, 'Wins': 8, 'Conf_wins': 5},
97 'TexasChristian': {'Latitude': 32.707823, 'Longitude': -97.362845, 'Rank': 108, 'Wins': 8, 'Conf_wins': 5},
98 'TexasElPaso': {'Latitude': 31.773088, 'Longitude': -106.50573, 'Rank': 112, 'Wins': 5, 'Conf_wins': 3},
99 'TexasTech': {'Latitude': 33.579166, 'Longitude': -101.886909, 'Rank': 19, 'Wins': 6, 'Conf_wins': 5},
100 'Toledo': {'Latitude': 41.657567, 'Longitude': -83.613857, 'Rank': 72, 'Wins': 6, 'Conf_wins': 5},
101 'Tulane': {'Latitude': 29.940446, 'Longitude': -90.120071, 'Rank': 55, 'Wins': 3, 'Conf_wins': 1},
102 'Tulsa': {'Latitude': 36.151898, 'Longitude': -95.944646, 'Rank': 114, 'Wins': 2, 'Conf_wins': 1},
103 'UCLA': {'Latitude': 34.068886, 'Longitude': -118.445217, 'Rank': 93, 'Wins': 4, 'Conf_wins': 2},
104 'Utah': {'Latitude': 40.764872, 'Longitude': -111.842281, 'Rank': 77, 'Wins': 9, 'Conf_wins': 5},
105 'UtahState': {'Latitude': 41.745065, 'Longitude': -111.810536, 'Rank': 44, 'Wins': 4, 'Conf_wins': 3},
106 'Vanderbilt': {'Latitude': 36.144051, 'Longitude': -86.800949, 'Rank': 99, 'Wins': 5, 'Conf_wins': 2},
107 'Virginia': {'Latitude': 38.033554, 'Longitude': -78.50798, 'Rank': 3, 'Wins': 7, 'Conf_wins': 5},
108 'VirginiaTech': {'Latitude': 37.228149, 'Longitude': -80.423163, 'Rank': 22, 'Wins': 11, 'Conf_wins': 7},
109 'WakeForest': {'Latitude': 36.135232, 'Longitude': -80.279291, 'Rank': 5, 'Wins': 7, 'Conf_wins': 3},
110 'Washington': {'Latitude': 47.655548, 'Longitude': -122.3032, 'Rank': 87, 'Wins': 7, 'Conf_wins': 6},
111 'WashingtonState': {'Latitude': 46.731803, 'Longitude': -117.15442, 'Rank': 94, 'Wins': 3, 'Conf_wins': 1},
112 'WestVirginia': {'Latitude': 39.648067, 'Longitude': -79.969881, 'Rank': 26, 'Wins': 4, 'Conf_wins': 3},
113 'WesternMichigan': {'Latitude': 42.283771, 'Longitude': -85.610271, 'Rank': 71, 'Wins': 7, 'Conf_wins': 6},
114 'Wisconsin': {'Latitude': 43.07651, 'Longitude': -89.412508, 'Rank': 30, 'Wins': 10, 'Conf_wins': 7},
115 'Wyoming': {'Latitude': 41.31469, 'Longitude': -105.566624, 'Rank': 80, 'Wins': 7, 'Conf_wins': 4}}
116
117 node_df = pd.merge(node_df, pd.DataFrame(node_data).T, how='left', left_index=True, right_index=True)
118 node_df

conference Latitude Longitude Rank Wins Conf_wins

school

AirForce 7 38.996093 -104.857222 83.0 6.0 2.0

Akron 6 41.075346 -81.512184 66.0 7.0 5.0

Alabama 9 33.215775 -87.538261 101.0 10.0 7.0

AlabamaBirmingham 4 33.501569 -86.805677 52.0 5.0 4.0

Arizona 8 32.248814 -110.987419 90.0 6.0 3.0

... ... ... ... ... ... ...

WashingtonState 8 46.731803 -117.154420 94.0 3.0 1.0

WestVirginia 1 39.648067 -79.969881 26.0 4.0 3.0

WesternMichigan 6 42.283771 -85.610271 71.0 7.0 6.0

Wisconsin 2 43.076510 -89.412508 30.0 10.0 7.0

Wyoming 7 41.314690 -105.566624 80.0 7.0 4.0

115 rows × 6 columns

Add conference dummy variables for prediction

1 node_df['conf'] = node_df['conference']
2 node_df = pd.get_dummies(node_df, columns=['conf'])
3 node_df

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 4/8
11/26/23, 11:39 PM GNN-Football - Colaboratory

conference Latitude Longitude Rank Wins Conf_wins conf_0 conf_1 conf_2 conf_3 conf_4 conf_5 conf

school

AirForce 7 38.996093 -104.857222 83.0 6.0 2.0 0 0 0 0 0 0

Akron 6 41.075346 -81.512184 66.0 7.0 5.0 0 0 0 0 0 0

Alabama 9 33.215775 -87.538261 101.0 10.0 7.0 0 0 0 0 0 0

AlabamaBirmingham 4 33.501569 -86.805677 52.0 5.0 4.0 0 0 0 0 1 0

Arizona 8 32.248814 -110.987419 90.0 6.0 3.0 0 0 0 0 0 0

... ... ... ... ... ... ... ... ... ... ... ... ...

WashingtonState 8 46.731803 -117.154420 94.0 3.0 1.0 0 0 0 0 0 0

WestVirginia 1 39.648067 -79.969881 26.0 4.0 3.0 0 1 0 0 0 0

WesternMichigan 6 42.283771 -85.610271 71.0 7.0 6.0 0 0 0 0 0 0

Wisconsin 2 43.076510 -89.412508 30.0 10.0 7.0 0 0 1 0 0 0

Wyoming 7 41.314690 -105.566624 80.0 7.0 4.0 0 0 0 0 0 0

115 rows × 18 columns


Add edge features
Add euclidean distance between schools from lat-long coordinates.

1 def euclidean_dist(edge_df,node_df):
2 edge_df = pd.merge(edge_df,
3 node_df[['Latitude','Longitude']].rename(columns={'Latitude':'src_lat','Longitude':'src_lon'}),
4 how='left',
5 left_on='source',
6 right_index=True)
7
8 edge_df = pd.merge(edge_df,
9 node_df[['Latitude','Longitude']].rename(columns={'Latitude':'trg_lat','Longitude':'trg_lon'}),
10 how='left',
11 left_on='target',
12 right_index=True)
13
14 edge_df['euclidean_dist'] = ((edge_df['trg_lat']-edge_df['src_lat'])**2 +
15 (edge_df['trg_lon']-edge_df['src_lon'])**2)**0.5
16
17 edge_df.drop(columns=['src_lat','src_lon','trg_lat','trg_lon'],inplace=True)
18 return edge_df
19
20 edge_df = euclidean_dist(edge_df,node_df)
21 edge_df

source target euclidean_dist

0 BrighamYoung FloridaState 29.056695

1 BrighamYoung NewMexico 7.210711

2 BrighamYoung SanDiegoState 9.236102

3 BrighamYoung Wyoming 6.174841

4 BrighamYoung Utah 0.548490

... ... ... ...

608 Clemson Maryland 7.304303

609 NevadaLasVegas Hawaii 43.176232

610 WakeForest Maryland 4.389546

611 OregonState California 6.769309

612 TexasChristian Hawaii 59.165743

613 rows × 3 columns

Add name similarity score

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 5/8
11/26/23, 11:39 PM GNN-Football - Colaboratory
1 def sim_metric(df, col1, col2):
2 return SequenceMatcher(None, str(df[col1]).lower(), str(df[col2]).lower()).ratio()
3
4 edge_df['name_sim_score'] = edge_df.apply(sim_metric,args=('source','target'),axis=1)
5
6 edge_df.loc[edge_df['source']=="Washington",:]

source target euclidean_dist name_sim_score

410 Washington Oregon 3.691792 0.375000

411 Washington Stanford 10.228200 0.333333

412 Washington WashingtonState 5.230988 0.800000

413 Washington MiamiFlorida 47.406820 0.272727

414 Washington OregonState 3.242308 0.285714

415 Washington California 9.783797 0.300000

Add conference game edge target variable

1 edge_df = pd.merge(edge_df,node_df['conference'].to_frame().rename(columns={'conference':'conf_source'}),
2 how='left',left_on='source',right_index=True)
3 edge_df = pd.merge(edge_df,node_df['conference'].to_frame().rename(columns={'conference':'conf_target'}),
4 how='left',left_on='target',right_index=True)
5
6 edge_df['conference_game'] = 0
7 edge_df.loc[edge_df['conf_source'] == edge_df['conf_target'], 'conference_game'] = 1
8 edge_df.drop(columns=['conf_source','conf_target'], inplace=True)
9 edge_df

source target euclidean_dist name_sim_score conference_game

0 BrighamYoung FloridaState 29.056695 0.250000 0

1 BrighamYoung NewMexico 7.210711 0.190476 1

2 BrighamYoung SanDiegoState 9.236102 0.240000 1

3 BrighamYoung Wyoming 6.174841 0.421053 1

4 BrighamYoung Utah 0.548490 0.125000 1

... ... ... ... ... ...

608 Clemson Maryland 7.304303 0.266667 1

609 NevadaLasVegas Hawaii 43.176232 0.200000 0

610 WakeForest Maryland 4.389546 0.222222 1

611 OregonState California 6.769309 0.380952 1

612 TexasChristian Hawaii 59.165743 0.300000 0

613 rows × 5 columns

Draw Data On Map

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 6/8
11/26/23, 11:39 PM GNN-Football - Colaboratory
1 def draw_map(graph,edge_df,node_df,nodes='all',size=[12,9]):
2 plt.rcParams["figure.figsize"]=size
3 m = Basemap(projection='merc',llcrnrlon=-127,llcrnrlat=23,urcrnrlon=-65,urcrnrlat=50,lat_ts=0,resolution='l',suppress_ti
4 m.drawmapboundary(fill_color='#A6CAE0', linewidth=1)
5 m.fillcontinents(color='#F1ECEB', alpha=0.7, lake_color='#76B6D8')
6 m.drawcoastlines(linewidth=0.1, color="white")
7 m.drawcountries(linewidth = 0.5)
8 m.drawstates(linewidth = 0.1)
9
10 map_df = pd.merge(edge_df,node_df[['Latitude','Longitude']]
11 .rename(columns={'Latitude':'src_lat','Longitude':'src_lon'}),
12 how='left',left_on='source',right_index=True)
13
14 map_df = pd.merge(map_df,node_df[['Latitude','Longitude']]
15 .rename(columns={'Latitude':'trg_lat','Longitude':'trg_lon'}),
16 how='left',left_on='target',right_index=True)
17
18 for idx, row in map_df.iterrows():
19 if row.euclidean_dist > 0.5:
20 if row.conference_game == 1:
21 m.drawgreatcircle(row.src_lon, row.src_lat, row.trg_lon, row.trg_lat, linewidth=0.5, color='#E75132')
22 else:
23 m.drawgreatcircle(row.src_lon, row.src_lat, row.trg_lon, row.trg_lat, linewidth=0.5, color='#69b3a2')
24
25 mx,my=m(node_df['Longitude'],node_df['Latitude'])
26 node_map = mx.to_frame()
27 node_map['Latitude'] = my
28 location = dict(zip(node_df.index,node_map.values))
29
30 cmap = {0:'#bd2309', 1:'#bbb12d',2:'#1480fa',3:'#14fa2f',4:'#faf214',
31 5:'#2edfea',6:'#ea2ec4',7:'#ea2e40',8:'#577a4d',9:'#2e46c0',
32 10:'#f59422',11:'#8086d9'}
33
34 if nodes == 'all':
35 nodes = graph.nodes()
36
37 colors = [cmap[graph.nodes[n]['value']] for n in nodes]
38 #plt.figure(figsize=(16, 6))
39 nx.draw_networkx_nodes(graph, location, nodelist=nodes,node_color=colors, node_size=50)
40 #plt.axis('off')
41 plt.show()
42
43 draw_map(G,edge_df,node_df,nodes='all')

GRAPH

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 7/8
11/26/23, 11:39 PM GNN-Football - Colaboratory

Train/Test Splits

1 node_train, node_test = train_test_split(node_df,test_size=0.15,random_state=42)


2 edge_train = edge_df.loc[~((edge_df['source'].isin(node_test.index)) | (edge_df['target'].isin(node_test.index)))]
3 edge_test = edge_df.loc[(edge_df['source'].isin(node_test.index)) | (edge_df['target'].isin(node_test.index))]

Add Bidirectional Duplication & Source-Target Index

1 def bidirectional(edge_df):
2 reverse_df = edge_df.rename(columns={'source':'target','target':'source'})
3 reverse_df = reverse_df[edge_df.columns]
4 reverse_df = pd.concat([edge_df, reverse_df], ignore_index=True, axis=0)
5 return reverse_df
6
7 def create_adj_id(node_df,edge_df):
8 node_df = node_df.reset_index().reset_index()
9 edge_df = pd.merge(edge_df,node_df[['school','index']].rename(columns={"index":"source_id"}),
10 how='left',left_on='source',right_on='school').drop(columns=['school'])
11 edge_df = pd.merge(edge_df,node_df[['school','index']].rename(columns={"index":"target_id"}),
12 how='left',left_on='target',right_on='school').drop(columns=['school'])
13
14 edge_df.dropna(inplace=True)
15 return node_df, edge_df
16
17 edge_full_adj = bidirectional(edge_df)
18 edge_train_adj = bidirectional(edge_train)
19
20 node_full_adj,edge_full_adj = create_adj_id(node_df,edge_full_adj)
21 node_train_adj,edge_train_adj = create_adj_id(node_train,edge_train_adj)

1 import numpy as np

1 node_sets = {
2 'sizes': [len(node_df)],
3 'features': {
4 'Latitude': np.array(node_df['Latitude'], dtype='float32').reshape(len(node_df),1),
5 'Longitude': np.array(node_df['Longitude'], dtype='float32').reshape(len(node_df),1),
6 'Rank': np.array(node_df['Rank'], dtype='int32').reshape(len(node_df),1),
7 'Wins': np.array(node_df['Wins'], dtype='int32').reshape(len(node_df),1),
8 'Conf_wins': np.array(node_df['Conf_wins'], dtype='int32').reshape(len(node_df),1),
9 'conference': np.array(node_df.iloc[:,-12:], dtype='int32'),
10 }
11 }
12
13 edge_sets ={
14 'sizes' : [len(edge_df)],
15 'features' : {
16 'name_sim_score': np.array(edge_df['name_sim_score'], dtype='float32').reshape(len(edge_df),1),
17 'euclidean_dist': np.array(edge_df['euclidean_dist'], dtype='float32').reshape(len(edge_df),1),
18 'conference_game': np.array(edge_df['conference_game'], dtype='int32').reshape(len(edge_df),1)
19 },
20 # adjacency = tfgnn.Adjacency.from_indices(
21 # source = ("schools", np.array(edge_df['source_id'], dtype='int32')),
22 # target = ("schools", np.array(edge_df['target_id'], dtype='int32')),
23 # )),
24 }

1 print(node_sets['sizes'])
2 print(len(node_sets['features']['Latitude']))
3 print(len(node_sets['features']['Longitude']))

[115]
115
115

https://colab.research.google.com/drive/1Z7IzlIFd0cP9t3wwwJz5g7IzP2mHBr4o#printMode=true 8/8

You might also like