Ansh Navin Mini Project

Optimising Network Routing in the Amazon
Dataset using Q- learning: A Reinforcement

Learning Approach
Guided By: Prof. Devashri Mehta
Machine Learning Mini Project
Project By:
1. Ansh Rajput ( 200670107002 ) Semester 7 :Div A

2. Navin Shripathi ( 200670107044 ) Semester 7 :Div A
Dataset Name: Amazon product co-purchasing network and ground-truth communities
Classifier Name: Q-Learning Agent ( Reinforcement Learning )
Code:
# 1. Imports and Setup

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
graph = nx.DiGraph()
# Constants
learning_rate = 0.05
discount_factor = 0.9
initial_exploration_prob = 1.0
final_exploration_prob = 0.1
exploration_decay = 0.002
num_episodes = 2000
num_service_levels = 3
# 2. Loading the Dataset

with open("/content/sample_data/amazon0302.txt", 'r') as file:
data = file.readlines()
# 3. Data Preprocessing
for line in data:

if not line.startswith("#"):
split_line = line.strip().split("\t")
if len(split_line) == 2:
from_node, to_node = map(int, split_line)
graph.add_edge(from_node, to_node)
else:
print(f"Skipping line: {line.strip()}")
nodes_subset = list(graph.nodes())[:20]
amazon_subgraph = graph.subgraph(nodes_subset)
# 4. Defining the Environment and Agent Classes
class NetworkTopology:
def __init__(self, graph):
self.graph = graph
self.nodes = list(graph.nodes())
self.latencies = nx.adjacency_matrix(graph).todense()
def get_latency(self, node1, node2):

idx1 = self.nodes.index(node1)
idx2 = self.nodes.index(node2)
return self.latencies[idx1, idx2]
class NodeProperties:
def __init__(self, graph_nodes):
self.properties = np.random.rand(len(graph_nodes), num_service_levels)
class RoutingEnvironment:
def __init__(self, graph, network_topology, node_properties):
self.graph = graph
self.nodes = list(graph.nodes())
self.network_topology = network_topology
self.node_properties = node_properties
def reset(self):
return np.random.choice(self.nodes)
def take_action(self, state, action, service_level):

next_state = action
reward = self.calculate_reward(state, next_state, service_level)
return next_state, reward
def calculate_reward(self, state, next_state, service_level):

latency = self.network_topology.get_latency(state, next_state)
idx = self.nodes.index(next_state)
reward = -latency * self.node_properties.properties[idx,
service_level]
return reward
def is_terminal(self, state):

return state == target_node
class EnhancedStableQLearningAgent:
def __init__(self, nodes, num_states, num_actions, num_service_levels,
learning_rate, discount_factor, initial_exploration_prob,
final_exploration_prob, exploration_decay):
self.nodes = nodes
self.num_states = num_states
self.num_actions = num_actions
self.num_service_levels = num_service_levels
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.initial_exploration_prob = initial_exploration_prob
self.final_exploration_prob = final_exploration_prob
self.exploration_prob = initial_exploration_prob
self.exploration_decay = exploration_decay
self.q_table = np.zeros((num_states, num_actions, num_service_levels))
def choose_action(self, state, service_level):

if np.random.uniform(0, 1) < self.exploration_prob:
return np.random.choice(self.nodes)
else:
state_idx = self.nodes.index(state)
return self.nodes[np.argmax(self.q_table[state_idx, :,
service_level])]
def update_q_table(self, state, action, service_level, reward,

next_state):
state_idx = self.nodes.index(state)
action_idx = self.nodes.index(action)
next_state_idx = self.nodes.index(next_state)
best_next_action_idx = np.argmax(self.q_table[next_state_idx, :,
service_level])
self.q_table[state_idx, action_idx, service_level] +=
self.learning_rate * (
reward + self.discount_factor * self.q_table[next_state_idx,
best_next_action_idx, service_level] - self.q_table[state_idx,
action_idx, service_level]
)
def update_exploration_prob(self, episode):

self.exploration_prob = self.final_exploration_prob +
(self.initial_exploration_prob - self.final_exploration_prob) *
np.exp(-self.exploration_decay * episode)
# 5. Training and Testing the Agent
num_nodes = len(amazon_subgraph.nodes())
source_node = list(amazon_subgraph.nodes())[0]
target_node = list(amazon_subgraph.nodes())[-1]
num_states = num_nodes * num_service_levels
num_actions = num_nodes
network_topology = NetworkTopology(amazon_subgraph)
node_properties = NodeProperties(amazon_subgraph.nodes())
environment = RoutingEnvironment(amazon_subgraph, network_topology,
node_properties)
improved_stable_agent =
EnhancedStableQLearningAgent(list(amazon_subgraph.nodes()),
num_states, num_actions, num_service_levels, learning_rate,
discount_factor, initial_exploration_prob, final_exploration_prob,
exploration_decay)
for episode in range(num_episodes):

state = environment.reset()
service_level = np.random.randint(0, num_service_levels)
while not environment.is_terminal(state):
action = improved_stable_agent.choose_action(state, service_level)
next_state, reward = environment.take_action(state, action,
service_level)
improved_stable_agent.update_q_table(state, action, service_level,
reward, next_state)
improved_stable_agent.update_exploration_prob(episode)
state = next_state
# Test the Enhanced Stable RL-based learned policy

improved_stable_rl_path = [source_node]
improved_stable_rl_total_latency = 0
state = source_node
service_level = 0
while state != target_node:
action = improved_stable_agent.choose_action(state, service_level)
next_state, _ = environment.take_action(state, action, service_level)
latency = network_topology.get_latency(state, next_state)
idx = network_topology.nodes.index(next_state)
improved_stable_rl_total_latency += latency *
node_properties.properties[idx, service_level]
improved_stable_rl_path.append(next_state)
state = next_state
# Calculate shortest path using Dijkstra's algorithm

dijkstra_path = nx.shortest_path(amazon_subgraph, source=source_node,
target=target_node)
dijkstra_total_latency = sum(amazon_subgraph[i][j].get('weight', 1)
for i, j in zip(dijkstra_path, dijkstra_path[1:]))
# Visualize the network

graph = nx.DiGraph()
plt.figure(figsize=(10, 6))
pos = nx.spring_layout(amazon_subgraph)
nx.draw(amazon_subgraph, pos, with_labels=True, node_size=500,
node_color='skyblue')
plt.title('Amazon Subgraph Network Topology')
plt.show()
(dijkstra_total_latency, improved_stable_rl_total_latency)
Output:
Figure 1 : Amazon Network Topography
Figure 2: Djikstra's versus Our Algorithm

Ansh Navin Mini Project

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Ansh Navin Mini Project

Uploaded by

Copyright:

Available Formats

Optimising Network Routing in the Amazon

Dataset using Q- learning: A Reinforcement

Machine Learning Mini Project

1. Ansh Rajput ( 200670107002 ) Semester 7 :Div A

Classifier Name: Q-Learning Agent ( Reinforcement Learning )

# 1. Imports and Setup

# 2. Loading the Dataset

for line in data:

def get_latency(self, node1, node2):

def take_action(self, state, action, service_level):

def calculate_reward(self, state, next_state, service_level):

def is_terminal(self, state):

def choose_action(self, state, service_level):

def update_q_table(self, state, action, service_level, reward,

def update_exploration_prob(self, episode):

for episode in range(num_episodes):

# Test the Enhanced Stable RL-based learned policy

# Calculate shortest path using Dijkstra's algorithm

# Visualize the network

Figure 1 : Amazon Network Topography

Figure 2: Djikstra's versus Our Algorithm

You might also like