You are on page 1of 7

Optimising Network Routing in the Amazon

Dataset using Q- learning: A Reinforcement


Learning Approach
Guided By: Prof. Devashri Mehta

Machine Learning Mini Project

Project By:

1. Ansh Rajput ( 200670107002 ) Semester 7 :Div A


2. Navin Shripathi ( 200670107044 ) Semester 7 :Div A
Dataset Name: Amazon product co-purchasing network and ground-truth communities

Classifier Name: Q-Learning Agent ( Reinforcement Learning )

Code:

# 1. Imports and Setup


import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

graph = nx.DiGraph()

# Constants
learning_rate = 0.05
discount_factor = 0.9
initial_exploration_prob = 1.0
final_exploration_prob = 0.1
exploration_decay = 0.002
num_episodes = 2000
num_service_levels = 3

# 2. Loading the Dataset


with open("/content/sample_data/amazon0302.txt", 'r') as file:
data = file.readlines()

# 3. Data Preprocessing

for line in data:


if not line.startswith("#"):
split_line = line.strip().split("\t")
if len(split_line) == 2:
from_node, to_node = map(int, split_line)
graph.add_edge(from_node, to_node)
else:
print(f"Skipping line: {line.strip()}")

nodes_subset = list(graph.nodes())[:20]
amazon_subgraph = graph.subgraph(nodes_subset)
# 4. Defining the Environment and Agent Classes
class NetworkTopology:
def __init__(self, graph):
self.graph = graph
self.nodes = list(graph.nodes())
self.latencies = nx.adjacency_matrix(graph).todense()

def get_latency(self, node1, node2):


idx1 = self.nodes.index(node1)
idx2 = self.nodes.index(node2)
return self.latencies[idx1, idx2]

class NodeProperties:
def __init__(self, graph_nodes):
self.properties = np.random.rand(len(graph_nodes), num_service_levels)

class RoutingEnvironment:
def __init__(self, graph, network_topology, node_properties):
self.graph = graph
self.nodes = list(graph.nodes())
self.network_topology = network_topology
self.node_properties = node_properties

def reset(self):
return np.random.choice(self.nodes)

def take_action(self, state, action, service_level):


next_state = action
reward = self.calculate_reward(state, next_state, service_level)
return next_state, reward

def calculate_reward(self, state, next_state, service_level):


latency = self.network_topology.get_latency(state, next_state)
idx = self.nodes.index(next_state)
reward = -latency * self.node_properties.properties[idx,
service_level]
return reward

def is_terminal(self, state):


return state == target_node

class EnhancedStableQLearningAgent:
def __init__(self, nodes, num_states, num_actions, num_service_levels,
learning_rate, discount_factor, initial_exploration_prob,
final_exploration_prob, exploration_decay):
self.nodes = nodes
self.num_states = num_states
self.num_actions = num_actions
self.num_service_levels = num_service_levels
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.initial_exploration_prob = initial_exploration_prob
self.final_exploration_prob = final_exploration_prob
self.exploration_prob = initial_exploration_prob
self.exploration_decay = exploration_decay
self.q_table = np.zeros((num_states, num_actions, num_service_levels))

def choose_action(self, state, service_level):


if np.random.uniform(0, 1) < self.exploration_prob:
return np.random.choice(self.nodes)
else:
state_idx = self.nodes.index(state)
return self.nodes[np.argmax(self.q_table[state_idx, :,
service_level])]

def update_q_table(self, state, action, service_level, reward,


next_state):
state_idx = self.nodes.index(state)
action_idx = self.nodes.index(action)
next_state_idx = self.nodes.index(next_state)
best_next_action_idx = np.argmax(self.q_table[next_state_idx, :,
service_level])
self.q_table[state_idx, action_idx, service_level] +=
self.learning_rate * (
reward + self.discount_factor * self.q_table[next_state_idx,
best_next_action_idx, service_level] - self.q_table[state_idx,
action_idx, service_level]
)

def update_exploration_prob(self, episode):


self.exploration_prob = self.final_exploration_prob +
(self.initial_exploration_prob - self.final_exploration_prob) *
np.exp(-self.exploration_decay * episode)
# 5. Training and Testing the Agent
num_nodes = len(amazon_subgraph.nodes())
source_node = list(amazon_subgraph.nodes())[0]
target_node = list(amazon_subgraph.nodes())[-1]
num_states = num_nodes * num_service_levels
num_actions = num_nodes

network_topology = NetworkTopology(amazon_subgraph)
node_properties = NodeProperties(amazon_subgraph.nodes())
environment = RoutingEnvironment(amazon_subgraph, network_topology,
node_properties)
improved_stable_agent =
EnhancedStableQLearningAgent(list(amazon_subgraph.nodes()),
num_states, num_actions, num_service_levels, learning_rate,
discount_factor, initial_exploration_prob, final_exploration_prob,
exploration_decay)

for episode in range(num_episodes):


state = environment.reset()
service_level = np.random.randint(0, num_service_levels)
while not environment.is_terminal(state):
action = improved_stable_agent.choose_action(state, service_level)
next_state, reward = environment.take_action(state, action,
service_level)
improved_stable_agent.update_q_table(state, action, service_level,
reward, next_state)
improved_stable_agent.update_exploration_prob(episode)
state = next_state
service_level = np.random.randint(0, num_service_levels)

# Test the Enhanced Stable RL-based learned policy


improved_stable_rl_path = [source_node]
improved_stable_rl_total_latency = 0
state = source_node
service_level = 0
while state != target_node:
action = improved_stable_agent.choose_action(state, service_level)
next_state, _ = environment.take_action(state, action, service_level)
latency = network_topology.get_latency(state, next_state)
idx = network_topology.nodes.index(next_state)
improved_stable_rl_total_latency += latency *
node_properties.properties[idx, service_level]
improved_stable_rl_path.append(next_state)
state = next_state
service_level = np.random.randint(0, num_service_levels)

# Calculate shortest path using Dijkstra's algorithm


dijkstra_path = nx.shortest_path(amazon_subgraph, source=source_node,
target=target_node)
dijkstra_total_latency = sum(amazon_subgraph[i][j].get('weight', 1)
for i, j in zip(dijkstra_path, dijkstra_path[1:]))

# Visualize the network


graph = nx.DiGraph()
plt.figure(figsize=(10, 6))
pos = nx.spring_layout(amazon_subgraph)
nx.draw(amazon_subgraph, pos, with_labels=True, node_size=500,
node_color='skyblue')
plt.title('Amazon Subgraph Network Topology')
plt.show()

(dijkstra_total_latency, improved_stable_rl_total_latency)
Output:

Figure 1 : Amazon Network Topography

Figure 2: Djikstra's versus Our Algorithm

You might also like