Team11 - CH - EN.U4AIE20054 - Assignment3: 0.1 Intelligence of Biological Systems 3 0.2 Assignment 3

Team11_CH.EN.
U4AIE20054_Assignment3
November 15, 2021
0.1 Intelligence of Biological Systems 3

0.2 Assignment 3
0.2.1 Name:Rohith ND
0.2.2 Roll No:20054
0.3 Question 1
[ ]: import time
start = time.time()
seq = "'TGCTGCTG'"
def StringKmerCompo(sequence, kmerlenght):
compo,i =[],0
while i < (len(sequence)-kmerlenght+1):
compo.append(sequence[i:i+kmerlenght])
i+=1
return compo
for e in StringKmerCompo(seq,3):
print (e)
end = time.time()
print(end - start)
'TG
TGC
GCT
CTG
TGC
GCT
CTG
TG'
0.001967906951904297
[ ]: # The divided 3 -mers are given as input and the overlapping patterns are␣
,→returned in the form of adjacency list
def OverlapGraph(reads):
#key a read, values: all suffix = prefix reads
1
matrix = {}
for e in reads:
matrix[e] = []
for e in matrix:
for b in reads:
if e[1:]==b[:-1]:
matrix[e].append(b)
return matrix
seq = "GTGCTATGCT"
result = OverlapGraph(StringKmerCompo(seq,3))
for e in result:
if result[e]:
print (e +" - > "+ str(result[e])[1:-1].replace("'",""))
GTG - > TGC, TGC

TGC - > GCT, GCT
GCT - > CTA
CTA - > TAT
TAT - > ATG
ATG - > TGC, TGC
[ ]: # Finding the Hamitonian Path with edges as

# GTG -- > 0 TGC -- > 1 GCT -- > 2 CTA -- > 3 TAT --> 4 ATG --> 5
class Graph:
# Constructor
def __init__(self, edges, n):
# A list of lists to represent an adjacency list

self.adjList = [[] for _ in range(n)]
# add edges to the undirected graph

for (src, dest) in edges:
self.adjList[src].append(dest)
self.adjList[dest].append(src)
def hamiltonianPaths(graph, v, visited, path, n):
if len(path) == n:
print(path)
return
2
for w in graph.adjList[v]:
if not visited[w]:
visited[w] = True
path.append(w)
hamiltonianPaths(graph, w, visited, path, n)
visited[w] = False
path.pop()
def findHamiltonianPaths(graph, n):
for start in range(n):
path = [start]
visited = [False] * n
visited[start] = True
hamiltonianPaths(graph, start, visited, path, n)
if __name__ == '__main__':
edges = [(0, 1),(1, 2), (2, 3), (3, 4), (4, 5), (5, 1)]
n = 6
graph = Graph(edges, n)
findHamiltonianPaths(graph, n)
[0, 1, 2, 3, 4, 5]
[0, 1, 5, 4, 3, 2]
[2, 3, 4, 5, 1, 0]
[5, 4, 3, 2, 1, 0]
[ ]: string='[0, 1, 2, 3, 4, 5],[0, 1, 5, 4, 3, 2],[2, 3, 4, 5, 1, 0],[5, 4, 3, 2,␣

,→1, 0]'
# GTG -- > 0 TGC -- > 1 GCT -- > 2 CTA -- > 3 TAT --> 4 ATG --> 5
dct={'0':'GTG','1':'TGC','2':'GCT','3':'CTA','4':'TAT','5':'ATG'}
newstr=''
for ch in string:
if ch.isdigit()==True:
dw=dct[ch]
newstr=newstr+dw
3
else:
newstr=newstr+ch
print (newstr)
[GTG, TGC, GCT, CTA, TAT, ATG],[GTG, TGC, ATG, TAT, CTA, GCT],[GCT, CTA, TAT,
ATG, TGC, GTG],[ATG, TAT, CTA, GCT, TGC, GTG]
[ ]: # Genome is represented as a path with edges (rather than nodes)

compo,i =[],0
i+=1
return compo
def PathGraph(text,k):
#path pattern: [edge,[node, node]]
pathEdge = []
#path of successive k-mers
pathEdge = StringKmerCompo(text,k)
pathGraph = []
for e in pathEdge:
pathGraph.append([e[:-1],e[1:]])
return pathGraph
print (PathGraph("'TGCTGCTG'", 3))
[["'T", 'TG'], ['TG', 'GC'], ['GC', 'CT'], ['CT', 'TG'], ['TG', 'GC'], ['GC',
'CT'], ['CT', 'TG'], ['TG', "G'"]]
[ ]: def StringKmerCompo(sequence, kmerlenght):

compo,i =[],0
i+=1
return compo
def DeBruijnGraph(seq,k):
path = PathGraph(seq,k)
i = 0
for e in path:
for b in path[i+1:]:
if e[0] == b[0] :
e.append(b[1])
4
i += 1
# Just remove duplicate edges

finalPath = []
check = {}
check = set(check)
for e in path:
check.add(e[0])
for e in path:
if e[0] in check:
finalPath.append(e)
check.remove(e[0])
return finalPath
for e in DeBruijnGraph("'TGCTGCTG'",3):
print (e[0] +" -> "+ str(e[1:])[1:-1].replace("'",""))
'T -> TG
TG -> GC, GC, "G"
GC -> CT, CT
CT -> TG, TG
[ ]: # Reconstructing the Genome path again
def GenomePath(reads):
path = reads[0]
for e in reads[1:]:
path += e[len(e)-1:]
return path
DNAstring_open = open("Q1.txt", 'r')

reads = DNAstring_open.readlines()
reads2 = []
for e in reads:
reads2.append(e[:-1])
print(reads2)
print (GenomePath(reads2))
['TG', 'TGC', 'GCT', 'CTG', 'TGC', 'GCT', 'CTG', 'T']

TGCTGCTGT
5
0.4 Question 2
[ ]: seq =␣
,→"GAGCATAATAAGGCGAAAGAGGCAGAACTGCTGCACGACAGTAAAGAGGTGCTTGAGCATATCCTTTCTGTCAAAGAAGCTATTGCTGAAC
print(seq[49:60])
print("The length of sequence is")
print(len(seq))
TGCTTGAGCAT
The length of sequence is
201
[ ]: # Dividing the given sequence into k mers where k = 3

# The input given is the genome sequence and the output is divided k mers␣
,→in stacked order
import time
start = time.time()
seq = "TGCTGCTG"
compo,i =[],0
i+=1
return compo
print (e)
end = time.time()
print(end - start)
TGC
GCT
CTG
TGC
GCT
CTG
0.0003008842468261719
[ ]: import time
start = time.time()
def OverlapGraph(reads):
matrix = {}
for e in reads:
matrix[e] = []
for e in matrix:
6
for b in reads:
if e[1:]==b[:-1]:
matrix[e].append(b)
return matrix
seq = "GCGGGGCC"
for e in result:
if result[e]:
end = time.time()
print(end - start)
Time = end - start
GCG - > CGG

CGG - > GGG, GGG, GGC
GGG - > GGG, GGG, GGC
GGC - > GCG, GCC
0.0004038810729980469
pathEdge = []
pathGraph = []
for e in pathEdge:
return pathGraph
print (PathGraph("TGCTGCTG", 3))
[['TG', 'GC'], ['GC', 'CT'], ['CT', 'TG'], ['TG', 'GC'], ['GC', 'CT'], ['CT',
'TG']]
[ ]: def DeBruijnGraph(seq,k):
path = PathGraph(seq,k)
i = 0
for e in path:
for b in path[i+1:]:
if e[0] == b[0] :
e.append(b[1])
i += 1
7
# Just remove duplicate edges
finalPath = []
check = {}
check = set(check)
for e in path:
check.add(e[0])
for e in path:
if e[0] in check:
finalPath.append(e)
check.remove(e[0])
return finalPath
for e in DeBruijnGraph("TGCTGCTG",3):
print (e[0] +" -> "+ str(e[1:])[1:-1].replace("'",""))
TG -> GC, GC
GC -> CT, CT
CT -> TG, TG
[ ]: # Reconstructing the Genome path again
def GenomePath(reads):
path = reads[0]
for e in reads[1:]:
path += e[len(e)-1:]
return path
DNAstring_open = open("Q2.txt", 'r')

reads = DNAstring_open.readlines()
reads2 = []
for e in reads:
reads2.append(e[:-1])
print(reads2)
print (GenomePath(reads2))
['TGC', 'GCT', 'CTG', 'TGC', 'GCT', 'CT']

TGCTGCTT
[ ]: # Dividing the given sequence into k mers where k = 5

# The input given is the genome sequence and the output is divided k mers␣
,→in stacked order
8
import time
start = time.time()
seq = "TGCTGCTG"
compo,i =[],0
i+=1
return compo
print (e)
end = time.time()
print(end - start)
TGCTG
GCTGC
CTGCT
TGCTG
0.0007865428924560547
[ ]: def OverlapGraph(reads):
matrix = {}
for e in reads:
matrix[e] = []
for e in matrix:
for b in reads:
if e[1:]==b[:-1]:
matrix[e].append(b)
return matrix
seq = "TGCTGCTG"
for e in result:
if result[e]:
TGCTG - > GCTGC

GCTGC - > CTGCT
CTGCT - > TGCTG, TGCTG
9
pathEdge = []
pathGraph = []
for e in pathEdge:
return pathGraph
print (PathGraph("TGCTGCTG", 5))
[['TGCT', 'GCTG'], ['GCTG', 'CTGC'], ['CTGC', 'TGCT'], ['TGCT', 'GCTG']]
[ ]:
10

Team11 - CH - EN.U4AIE20054 - Assignment3: 0.1 Intelligence of Biological Systems 3 0.2 Assignment 3

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Team11 - CH - EN.U4AIE20054 - Assignment3: 0.1 Intelligence of Biological Systems 3 0.2 Assignment 3

Uploaded by

Copyright:

Available Formats

Team11_CH.EN.

November 15, 2021

0.1 Intelligence of Biological Systems 3

GTG - > TGC, TGC

[ ]: # Finding the Hamitonian Path with edges as

# A list of lists to represent an adjacency list

# add edges to the undirected graph

def hamiltonianPaths(graph, v, visited, path, n):

hamiltonianPaths(graph, w, visited, path, n)

def findHamiltonianPaths(graph, n):

for start in range(n):

hamiltonianPaths(graph, start, visited, path, n)

[ ]: string='[0, 1, 2, 3, 4, 5],[0, 1, 5, 4, 3, 2],[2, 3, 4, 5, 1, 0],[5, 4, 3, 2,␣

[ ]: # Genome is represented as a path with edges (rather than nodes)

def StringKmerCompo(sequence, kmerlenght):

print (PathGraph("'TGCTGCTG'", 3))

[ ]: def StringKmerCompo(sequence, kmerlenght):

# Just remove duplicate edges

[ ]: # Reconstructing the Genome path again

DNAstring_open = open("Q1.txt", 'r')

['TG', 'TGC', 'GCT', 'CTG', 'TGC', 'GCT', 'CTG', 'T']

[ ]: # Dividing the given sequence into k mers where k = 3

GCG - > CGG

[ ]: # Genome is represented as a path with edges (rather than nodes)

print (PathGraph("TGCTGCTG", 3))

[ ]: # Reconstructing the Genome path again

DNAstring_open = open("Q2.txt", 'r')

['TGC', 'GCT', 'CTG', 'TGC', 'GCT', 'CT']

[ ]: # Dividing the given sequence into k mers where k = 5

TGCTG - > GCTGC

[ ]: # Genome is represented as a path with edges (rather than nodes)

print (PathGraph("TGCTGCTG", 5))

[['TGCT', 'GCTG'], ['GCTG', 'CTGC'], ['CTGC', 'TGCT'], ['TGCT', 'GCTG']]

You might also like