You are on page 1of 9

UNIVERSITY OF MAURITIUS

FACULTY OF AGRICULTURE
BSc (Hons) Biotechnology

AGRI 2081Y (3) - COMPUTATIONAL BIOLOGY

Name of Student: Marie Natacha Meunier

Student I.D: 1712892

Date: 25th May 2020

Lecturer Name: Dr Shakuntala Baichoo


chain_a = """SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKM
FCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVV
RRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFR
HSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILT
IITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKG
EPHHELPPGSTKRALPNNT"""

#Question 1 a

num_lines = chain_a.count ("\n")


print (num_lines)

#Question 1 b
length sequence = len (chain_a) - chain_a.count ("\n")
print (length sequence: ", length)

#Question 1 c
new_chain = chain_a.replace("\n", "")
print("New Chain:",new_chain)

#Question 1 d

count = 0
result=0
for i in chain_a:
if i == 'C':
count = count + 1
print ("Number of C:",count)

#Question 1 e
if "NLRVEYLDDRN" in chain_a:
print("yes found");

pos= chain_a.find("NLRVEYLDDRN")
print("Starting position :",pos);
Question 2

dna_seq = """GGGCTTGTGGCGCGAGCTTCTGAAACTAGGCGGCAGAGGCGGAGCCGCT
GTGGCACTGCTGCGCCTCTGCTGCGCCTCGGGTGTCTTTT
GCGGCGGTGGGTCGCCGCCGGGAGAAGCGTGAGGGGACAG
ATTTGTGACCGGCGCGGTTTTTGTCAGCTTACTCCGGCCA AAAAAGAACTGCACCTCTGGAGCGG""

#Question 2 a

# Count the number of C’s in DNA sequence


no_c = dna_seq.count ("C")

# Count the number of G’s in DNA sequence


no_g = dna_seq.count ("G")

#determine the length of the DNA sequence


dna_length = len(dna_seq)

#compute the GC content

gc_cont = (no_g + no_c)

#Question 2 b

rna_seq = dna_seq.replace("T","U")
#Question 2 c

intron = dna_seq[50:156]
exon1 = dna_seq[0:50]
exon2 = dna_seq[156:]
spliced = exon1+exon2

Question 3
#Question 3 a

clusters = """\
>Cluster 0
0 >YLR106C at 100.00%
>Cluster 50
0 >YPL082C at 100.00%
>Cluster 54
0 >YHL009W-A at 90.80%
1 >YHL009W-B at 100.00%
2 >YJL113W at 98.77%
3 >YJL114W at 97.35%
>Cluster 52
0 >YBR208C at 100.00%
"""

#Question a
result = re.findall(r">Cluster?([ \d.]+)", clusters, re.IGNORECASE |
re.MULTILINE)
#print("ID :",str(result))

#Question b
r = clusters.replace('>Cluster', 'Test')
#print("New :",r)
result = re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE |
re.MULTILINE)
#print("sd :",str(result))

per=re.findall(r"> ?([A-Za-z0-9-]+)", r, re.IGNORECASE | re.MULTILINE)


+ re.findall(r"at ?([\d.]+)", clusters, re.IGNORECASE | re.MULTILINE)
#print("sd :",str(per))

lines = r.split('\n')
#print(lines)
for line in lines:
print(re.findall(r"> ?([A-Za-z0-9-]+)", line, re.IGNORECASE |
re.MULTILINE) + re.findall(r"at ?([\d.]+)", line, re.IGNORECASE |
re.MULTILINE))
#Question 4

("A", "T"): 10.0 / 5.0,


("A", "C"): 10.0 / 7.0,
("A", "G"): 10.0 / 6.0,
("T", "C"): 5.0 / 7.0,
("T", "G"): 5.0 / 6.0,
("C", "G"): 7.0 / 6.0 .
#Question 4 a

#There is no difference between the len(ratios), len(ratios.keys()),


len(ratios.values()) and len(ratios.items()) since all the commands
measure the key values
print len(ratios.keys())
print len(ratios.values())
print len(ratios.items())

#Question 4 b

ratio= ("A", "T"): 10.0 / 5.0, ("C", "G"): 7.0 / 6.0 .

If ("A", "T") in ratios:


print ("yes 'A, T' is found in ratios")
or:
print ("No 'T, A' is not found in ratios")

If ("C", "G") in ratios:


print ("yes 'C, G' is found in ratios")
or:
print ("No 'C, G' is not found in ratios")
#Question 4 c

contains_2 = 2 in ratios.values()
print contains_2

contains_3 = 3 in ratios.values()
print contains_3

#Question 4 d

2 in ("A", "T"):
print (("A", "T"), 2) in ratios.items()

1000 in ("C", "G"):


print (("C", "G"), 1000) in ratios.items()

#Question 4 e

keys = [key_value[0]
for key_value in ratios.items()]
values = [key_value[-1]
for key_value in ratios.items()]
#Question 5

#translate the list:

list = ["A", "T", "T", "A", "G", "T", "C"]

translation=

String="ade tym tym ade gua tym cyt"

str = " ade tym tym ade gua tym cyt " 
    
        
s = ['A, T, T, A, G, T, C ', 'for', ' ade, tym, tym, ade, gua, tym, cyt ']

print(listToString(s))
#Question 6

A python program to read the file data.fasta

text=""">2HMI:A|PDBID|CHAIN|SEQUENCE

PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI

>2HMI:B|PDBID|CHAIN|SEQUENCE

PISPIETVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKI

>2HMI:C|PDBID|CHAIN|SEQUENCE

DIQMTQTTSSLSASLGDRVTISCSASQDISSYLNWYQQKPEGTVKLLIYY

>2HMI:D|PDBID|CHAIN|SEQUENCE

QITLKESGPGIVQPSQPFRLTCTFSGFSLSTSGIGVTWIRQPSGKGLEWL

>2HMI:E|PDBID|CHAIN|SEQUENCE

ATGGCGCCCGAACAGGGAC

>2HMI:F|PDBID|CHAIN|SEQUENCE

GTCCCTGTTCGGGCGCCA"""

fastaFile = open('fasta_file.txt')

You might also like