p3 Python Project

"""This program ask the user to enter a number of DNA sequences and finds the
consensus sequence. The ouput is the consensus.

Add the corresponding code to accomplish the requested tasks
"""
##### ADD YOUR NAME, Student ID, and Section number #######
# NAME: DANIELLA VARGAS FIGUEROA
# STUDENT ID:802228453
# SECTION:096
###########################################################
# The function load_data, it take as an argument, it input the DNA sequences, save
in the list and return the list
# a: is a number of sequences to be input
#Auxiliar functions
def valid_seq(seq):
isvalid = False
for s in list(seq):
if (s == 'A') or (s == 'C') or (s == 'T') or (s == 'G'):
isvalid = True
else:
isvalid = False
break
return isvalid
#the max_nuc() takes four inputs: the nucleotide frequencey in a colum, and returns
a list of two elements containing the nucleotide
#and its frequency in a column
def max_nuc(freq_a, freq_g, freq_c, freq_t):
if freq_a > freq_g and freq_a > freq_c and freq_a > freq_t:
return ["A", freq_a]
elif freq_g > freq_a and freq_g > freq_c and freq_g > freq_t:
return ["G", freq_g]
elif freq_c > freq_a and freq_c > freq_g and freq_c > freq_t:
return ["C", freq_c]
elif freq_t > freq_a and freq_t > freq_c and freq_t > freq_g:
return ["T", freq_t]
#########################
#the load_data() takes two inputs: the file name and returns one tuple (firts one
list of elements, and option (consesus or transcription)
def load_data(filename, option):
#assign variable and open file
lst = []
infile = open(filename, "r")
#read file
valid_length = None
for line in infile:
seq = line.rstrip("\n")
#Check if the sequence is valid and is the same length as the first one to
continue with program.
if valid_seq(seq) == True and (valid_length == len(seq)
or valid_length == None):
lst.append(seq)
if len(lst) == 1:
valid_length = len(lst[0])
result = (lst, option)
#Return result.
return result
# The function count_nucl_freq, it take arguments the load_data, contains the

frecuencies of the nucleotides for each column
# a: is a list of DNA sequences
def count_nucl_freq(a):
#create an empty list to store each letter's frequency
frequencies = []
#Use for loops to look for the frequency of each letter in each column.
for i in range(0, len(a[0])):
columnfrec = [0, 0, 0, 0]
for j in range(0, len(a)):
let = a[j][i]
if let == "A":
columnfrec[0] = columnfrec[0] + 1
elif let == "G":
elif let == "C":
else:
#Append each Maximum frequency by column to the list frequencies.
frequencies.append(
max_nuc(columnfrec[0], columnfrec[1], columnfrec[2], columnfrec[3]))
#return list
return frequencies
# analyze the list by columns
# find nucleotide frecuencies
# you will decide what data type, from the ones already explained, works best for
your implementation
# return frecuencies
# The function find_consensus, it take arguments the count_nucl_freq and return a

consensus sequence
# a: is a you return in count_nucl_freq
def find_consensus(a):
#Open a new file to store the consesus string.
f = open("answer.txt", "w")
# Create an empty string to store the consensus.
consensusString = ""
#For loop to access each element in index 0 in the frequency list done before and
add it to the consensous string.
for element in a:
#print(element)
x = element[0]
consensusString = consensusString + x
#Write the Consensus inside the file.
f.write(consensusString)
# function convert_seqn it take one argument the dna sequences

def convert_seq(a):
#Create empty string to store converted DNA to RNA results
result = ""
#Iterate throught each DNA sequences and convert each letter.
for let in a:
if let == "A":
result += "U"
elif let == "T":
result += "A"
elif let == "C":
result += "G"
elif let == "G":
result += "C"
#Return string with converted RNA sequences.
return result
# convert dna to rna sequences

# return rna sequences
#function transcript_seq, it take one argument the list of sequences

def transcript_seq(a):
#Create an empty list to store converted RNA sequences.
rnaseq = []
file = open("answer.txt", "w")
#Iterate through DNA sequences and convert each sequence to RNA.
for seq in a:
rna = convert_seq(seq)
file.write(rna + "\n")
#Append converted RNA sequences to empty list.
rnaseq.append(rna)
#Return RNA sequences list.
return rnaseq
# Read list DNA sequences

# return list RNA Sequences
# The function main, your program to start and function calls and write new file
with consensus or transcription
def main():
filename = input("Write the name of the file: ")
print('Select option:')
print('1. Consensus Sequences')
print('2. Transcriptions Sequences')
option = int(input(""))
#Create while loop to only accept option one or two.
while option != 1 and option != 2:
print("Incorrect input. Only enter 1 or 2.")
option = int(input(""))
data = load_data(filename, option)
#Create the function calls according to the option the user inputs.
if data[1] == 1:
freq = count_nucl_freq(data[0])
cons = find_consensus(freq)
elif data[1] == 2:
# conv=convert_seq(data[0])
transcript = transcript_seq(data[0])
#ask the number DNA sequence

# contains the functions call
# function doesn't return anyting
if __name__ == "__main__":
main()

p3 Python Project

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

p3 Python Project

Uploaded by

Copyright:

Available Formats

"""This program ask the user to enter a number of DNA sequences and finds the

consensus sequence. The ouput is the consensus.

# The function count_nucl_freq, it take arguments the load_data, contains the

# The function find_consensus, it take arguments the count_nucl_freq and return a

# function convert_seqn it take one argument the dna sequences

# convert dna to rna sequences

#function transcript_seq, it take one argument the list of sequences

# Read list DNA sequences

#ask the number DNA sequence

You might also like