You are on page 1of 13

19BIO201

September 16, 2021

1 Assignemnt 1
1.1 Name:Rohith ND
1.2 Roll No:20054
1. Translate the given DNA sequence in all 6 frames of translation. Find ORF region if present
and highlight the longest ORF possible in the given example. DO NOT use any online tools.
Use any programming language for coding.
2. For the double strand sequence (u get by finding complementary sequence to the given se-
quence), find out the ratio of AT:GC
Note: The given sequence is in FASTA format.
sequence 5’ to 3’ TTTAGGACTTGACAGTTTCGGTCCGGAATTCCCGGGATC-
TACTACAAACAAATTACATTCAATATGCTTT TCTCAAGAGACTTTATTAT-
GCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCC
TCAACCAGAAGCCAAAGCTGATGCTTCTCCAGCGGACTATGGCGACTACG-
GTGACTATGGCGATTACGGA AACTATGGAACATACGAGAAATACCCATC-
CTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTG GATATG-
GTAGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGC-
CGCCCCAGAGCCTGC TCCAGTAGCAAAGCGTGATGATTATGGAAAC-
TATGGAGACTATGGTGACTACGGAAACTACGGCAACTAC GAGAACTAC-
CCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTC-
CTCGTCAAGCTCTT CATCAGCTACACCAACACCAACTCCAACCGGT-
TACGGTTCATACGCAAGCTACGGAGCATACAAGAACAA GAAAGCACGT-
GCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGT-
GATTACGGCAAC TACGAGAACTACACCCCTGCCTCCACCTCCTCCA-
CATCCACATCCACCGTCACCCCAACTCCCACTCCCT ACCCAACGGC-
TACACAACTACGGCTCCTANGTACTACGACACTACAGCGCGAAGCGAT-
ACGTTCAGCTTA GCGATGACTATTGACAGTTTATTGTATACACACTCTTA-
GACGATGAATCGCCTTTAATATATGATATAAT GATCAATGTTGACTGCCTCG-
CACTGATTGGAGAATGGATAAAAG

[ ]: Pu = ("A","G")
Py = ("T","C")

[ ]: def Comp(seq):
comp=""
for i in range(len(seq)):

1
if(seq[i] in Pu):
if(seq[i]==Pu[0]):
comp+=Py[0]
else:
comp+=Py[1]
elif(seq[i] in Py):
if (seq[i] == Py[0]):
comp+=Pu[0]
else:
comp+=Pu[1]
else:
comp+=""
return comp

[ ]: def Frames(seq):
Frames=[]
for i in range(3):
seqt=seq[i:]
Frames.insert(i,seqt[:(len(seqt)-(len(seqt)%3))])
seq=Comp(seq)
seq=seq[::-1]
for i in range(3):
seqt = seq[i:]
Frames.insert(i+3, seqt[:(len(seqt) - (len(seqt) % 3))])
return Frames

[ ]: def Translation(seq):
def Process(seq):
table = {
'ATA': 'I', 'ATC': 'I', 'ATT': 'I', 'ATG': 'M',
'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
'AAC': 'N', 'AAT': 'N', 'AAA': 'K', 'AAG': 'K',
'AGC': 'S', 'AGT': 'S', 'AGA': 'R', 'AGG': 'R',
'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
'CAC': 'H', 'CAT': 'H', 'CAA': 'Q', 'CAG': 'Q',
'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
'GAC': 'D', 'GAT': 'D', 'GAA': 'E', 'GAG': 'E',
'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
'TTC': 'F', 'TTT': 'F', 'TTA': 'L', 'TTG': 'L',
'TAC': 'Y', 'TAT': 'Y', 'TAA': '*', 'TAG': '*',
'TGC': 'C', 'TGT': 'C', 'TGA': '*', 'TGG': 'W',
}
protein = ""

2
for i in range(0, len(seq), 3):
codon = seq[i:i + 3]
protein += table[codon] + " "
return protein
aa = {
'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu',
'F': 'Phe', 'G': 'Gly', 'H': 'His', 'I': 'Ile',
'K': 'Lys', 'L': 'Lue', 'M': 'Met', 'N': 'Asn',
'P': 'Pro', 'Q': 'Gln', 'R': 'Arg', 'S': 'Ser',
'T': 'Thr', 'V': 'Val', 'W': 'Trp', 'Y': 'Tyr'
}
F = Frames(seq)
Protein = []
tran = []

for i in F:
Protein.append(Process(i))
for i in Protein:
s = ""
for j in range(len(i)):
if (i[j] == '*'):
s += '* '
elif (i[j] != " "):
s += aa[i[j]] + " "
tran.append(s)
return Protein,tran

[ ]: def ORF(seq):
def ORFs(seq):

Start = "ATG"
Stop = ["TAA", "TAG", "TGA"]
bool = j = 0
orf = []

while (j < len(seq)):


s1 = s2 = None
bool = 0
for i in range(j, len(seq), 3):
codon = seq[i:i + 3]
if (codon == Start and bool == 0):
bool = 1
s1 = i

if (codon in Stop and bool == 1):


s2 = i + 3

3
break

if (s1 != None):
if (s2 == None):

s2 = len(seq) - (len(seq[s1:]) % 3)
orf.append(seq[s1:s2])
j = len(seq)
else:
j = s1 + 3
orf.append(seq[s1:s2])
else:
orf.append("")
j = len(seq)

return orf
F = Frames(seq)
orf = {'Frame 1': ORFs(F[0]), 'Frame 2': ORFs(F[1]),
'Frame 3': ORFs(F[2]), 'Frame 4': ORFs(F[3]),
'Frame 5': ORFs(F[4]), 'Frame 6': ORFs(F[5]),
}

return orf

[ ]: def LongORF(seq):
orf=ORF(seq)
New=[]
New.extend(orf["Frame 1"])
New.extend(orf["Frame 2"])
New.extend(orf["Frame 3"])
New.extend(orf["Frame 4"])
New.extend(orf["Frame 5"])
New.extend(orf["Frame 6"])
gt=New[0]
for i in range(1,len(New)):
if(len(gt)<len(New[i])):
gt=New[i]

for i in orf:
if(gt in orf[i]):
index="Number "+str(orf[i].index(gt)+1)+" Translational ORF in "+i
return index,gt

[ ]: def Ratio(seq):
A=seq.count("A")
T=seq.count("T")
netP1=2*(A+T)

4
G = seq.count("G")
C = seq.count("C")
netP2 = 2 * (G + C)
ratio=str(netP1)+" : "+str(netP2)
return ratio

[ ]: seq="TTTAGGACTTGACAGTTTCGGTCCGGAATTCCCGGGATCTACT" \
"ACAAACAAATTACATTCAATATGCTTTTCTCAAGAGACTTTATT" \
"ATGCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTG" \
"CAGAGCCTCAACCAGAAGCCAAAGCTGATGCTTCTCCAGCGGACTAT" \
"GGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAA" \
"ATACCCATCCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGAT" \
"ATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAG" \
"AAGCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTAT" \
"GGAGACTATGGTGACTACGGAAACTACGGCAACTACGAGAACTACCCTTCCG" \
"CTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCT" \
"TCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGG"\
"AGCATACAAGAACAAGAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAA"\
"ACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTACACCCCTG" \
"CCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTAC" \
"CCAACGGCTACACAACTACGGCTCCTAAGTACTACGACACTACAGCGCGAAGC" \
"GATACGTTCAGCTTAGCGATGACTATTGACAGTTTATTGTATACACACTCTTAGAC"\
"GATGAATCGCCTTTAATATATGATATAATGATCAATGTTGACTGCCTCGCACTGAT"\
"TGGAGAATGGATAAAAG"

[ ]: f=Frames(seq)
print("Frame 1")
print(f[0])
print("Frame 2")
print(f[1])
print("Frame 3")
print(f[2])
print("Frame 4")
print(f[3])
print("Frame 5")
print(f[4])
print("Frame 6")
print(f[5])
print()

Frame 1
TTTAGGACTTGACAGTTTCGGTCCGGAATTCCCGGGATCTACTACAAACAAATTACATTCAATATGCTTTTCTCAAGAGA
CTTTATTATGCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCTCAACCAGAAGCCAAAGCTG
ATGCTTCTCCAGCGGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCATCC
TCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAA
GAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTATGGAGACTATGGTGACT
ACGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCG
TCAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAA

5
GAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACT
ACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTA
CGGCTCCTAAGTACTACGACACTACAGCGCGAAGCGATACGTTCAGCTTAGCGATGACTATTGACAGTTTATTGTATACA
CACTCTTAGACGATGAATCGCCTTTAATATATGATATAATGATCAATGTTGACTGCCTCGCACTGATTGGAGAATGGATA
AA
Frame 2
TTAGGACTTGACAGTTTCGGTCCGGAATTCCCGGGATCTACTACAAACAAATTACATTCAATATGCTTTTCTCAAGAGAC
TTTATTATGCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCTCAACCAGAAGCCAAAGCTGA
TGCTTCTCCAGCGGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCATCCT
CTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAG
AAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTATGGAGACTATGGTGACTA
CGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGT
CAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAG
AAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTA
CACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTAC
GGCTCCTAAGTACTACGACACTACAGCGCGAAGCGATACGTTCAGCTTAGCGATGACTATTGACAGTTTATTGTATACAC
ACTCTTAGACGATGAATCGCCTTTAATATATGATATAATGATCAATGTTGACTGCCTCGCACTGATTGGAGAATGGATAA
AA
Frame 3
TAGGACTTGACAGTTTCGGTCCGGAATTCCCGGGATCTACTACAAACAAATTACATTCAATATGCTTTTCTCAAGAGACT
TTATTATGCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCTCAACCAGAAGCCAAAGCTGAT
GCTTCTCCAGCGGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCATCCTC
TTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAGA
AAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTATGGAGACTATGGTGACTAC
GGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTC
AAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAGA
AAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTAC
ACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTACG
GCTCCTAAGTACTACGACACTACAGCGCGAAGCGATACGTTCAGCTTAGCGATGACTATTGACAGTTTATTGTATACACA
CTCTTAGACGATGAATCGCCTTTAATATATGATATAATGATCAATGTTGACTGCCTCGCACTGATTGGAGAATGGATAAA
AG
Frame 4
CTTTTATCCATTCTCCAATCAGTGCGAGGCAGTCAACATTGATCATTATATCATATATTAAAGGCGATTCATCGTCTAAG
AGTGTGTATACAATAAACTGTCAATAGTCATCGCTAAGCTGAACGTATCGCTTCGCGCTGTAGTGTCGTAGTACTTAGGA
GCCGTAGTTGTGTAGCCGTTGGGTAGGGAGTGGGAGTTGGGGTGACGGTGGATGTGGATGTGGAGGAGGTGGAGGCAGGG
GTGTAGTTCTCGTAGTTGCCGTAATCACCGTAGTCACCGTAGTTTCCATAGTCTTCTGGGTTGGCTTCTGGAGCACGTGC
TTTCTTGTTCTTGTATGCTCCGTAGCTTGCGTATGAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAGCTGATGAAGAGC
TTGACGAGGAGCTTGAAGGGGTGGCAGAAGAGGTAGCAGAAGAGGTAGCGGAAGGGTAGTTCTCGTAGTTGCCGTAGTTT
CCGTAGTCACCATAGTCTCCATAGTTTCCATAATCATCACGCTTTGCTACTGGAGCAGGCTCTGGGGCGGCTTCTCTAAC
TTTCTTGTTCTTGTAGGTACCGTAGCTGGCATAGCTACCATATCCAGTTGGTGTTGGTGTTGGTGTAGGTGTGACGGAGG
AAGAGGATGGGTATTTCTCGTATGTTCCATAGTTTCCGTAATCGCCATAGTCACCGTAGTCGCCATAGTCCGCTGGAGAA
GCATCAGCTTTGGCTTCTGGTTGAGGCTCTGCAACAGGTGCAGCGACAGCGCCAACGGCGGCGAATGCCAAAAGCATAAT
AAAGTCTCTTGAGAAAAGCATATTGAATGTAATTTGTTTGTAGTAGATCCCGGGAATTCCGGACCGAAACTGTCAAGTCC
TA
Frame 5
TTTTATCCATTCTCCAATCAGTGCGAGGCAGTCAACATTGATCATTATATCATATATTAAAGGCGATTCATCGTCTAAGA
GTGTGTATACAATAAACTGTCAATAGTCATCGCTAAGCTGAACGTATCGCTTCGCGCTGTAGTGTCGTAGTACTTAGGAG
CCGTAGTTGTGTAGCCGTTGGGTAGGGAGTGGGAGTTGGGGTGACGGTGGATGTGGATGTGGAGGAGGTGGAGGCAGGGG

6
TGTAGTTCTCGTAGTTGCCGTAATCACCGTAGTCACCGTAGTTTCCATAGTCTTCTGGGTTGGCTTCTGGAGCACGTGCT
TTCTTGTTCTTGTATGCTCCGTAGCTTGCGTATGAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAGCTGATGAAGAGCT
TGACGAGGAGCTTGAAGGGGTGGCAGAAGAGGTAGCAGAAGAGGTAGCGGAAGGGTAGTTCTCGTAGTTGCCGTAGTTTC
CGTAGTCACCATAGTCTCCATAGTTTCCATAATCATCACGCTTTGCTACTGGAGCAGGCTCTGGGGCGGCTTCTCTAACT
TTCTTGTTCTTGTAGGTACCGTAGCTGGCATAGCTACCATATCCAGTTGGTGTTGGTGTTGGTGTAGGTGTGACGGAGGA
AGAGGATGGGTATTTCTCGTATGTTCCATAGTTTCCGTAATCGCCATAGTCACCGTAGTCGCCATAGTCCGCTGGAGAAG
CATCAGCTTTGGCTTCTGGTTGAGGCTCTGCAACAGGTGCAGCGACAGCGCCAACGGCGGCGAATGCCAAAAGCATAATA
AAGTCTCTTGAGAAAAGCATATTGAATGTAATTTGTTTGTAGTAGATCCCGGGAATTCCGGACCGAAACTGTCAAGTCCT
AA
Frame 6
TTTATCCATTCTCCAATCAGTGCGAGGCAGTCAACATTGATCATTATATCATATATTAAAGGCGATTCATCGTCTAAGAG
TGTGTATACAATAAACTGTCAATAGTCATCGCTAAGCTGAACGTATCGCTTCGCGCTGTAGTGTCGTAGTACTTAGGAGC
CGTAGTTGTGTAGCCGTTGGGTAGGGAGTGGGAGTTGGGGTGACGGTGGATGTGGATGTGGAGGAGGTGGAGGCAGGGGT
GTAGTTCTCGTAGTTGCCGTAATCACCGTAGTCACCGTAGTTTCCATAGTCTTCTGGGTTGGCTTCTGGAGCACGTGCTT
TCTTGTTCTTGTATGCTCCGTAGCTTGCGTATGAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAGCTGATGAAGAGCTT
GACGAGGAGCTTGAAGGGGTGGCAGAAGAGGTAGCAGAAGAGGTAGCGGAAGGGTAGTTCTCGTAGTTGCCGTAGTTTCC
GTAGTCACCATAGTCTCCATAGTTTCCATAATCATCACGCTTTGCTACTGGAGCAGGCTCTGGGGCGGCTTCTCTAACTT
TCTTGTTCTTGTAGGTACCGTAGCTGGCATAGCTACCATATCCAGTTGGTGTTGGTGTTGGTGTAGGTGTGACGGAGGAA
GAGGATGGGTATTTCTCGTATGTTCCATAGTTTCCGTAATCGCCATAGTCACCGTAGTCGCCATAGTCCGCTGGAGAAGC
ATCAGCTTTGGCTTCTGGTTGAGGCTCTGCAACAGGTGCAGCGACAGCGCCAACGGCGGCGAATGCCAAAAGCATAATAA
AGTCTCTTGAGAAAAGCATATTGAATGTAATTTGTTTGTAGTAGATCCCGGGAATTCCGGACCGAAACTGTCAAGTCCTA
AA

[ ]: Res = Translation(seq)
print("Frames of Translation")
print(Res[0][0])
print(Res[1][0])
print()
print(Res[0][1])
print(Res[1][1])
print()
print(Res[0][2])
print(Res[1][2])
print()
print(Res[0][3])
print(Res[1][3])
print()
print(Res[0][4])
print(Res[1][4])
print()
print(Res[0][5])
print(Res[1][5])
print()
print("ORFS in Corresponding Frames")
print("Frame 1")
print(ORF(seq)["Frame 1"])

7
print("Frame 2")
print(ORF(seq)["Frame 2"])
print("Frame 3")
print(ORF(seq)["Frame 3"])
print("Frame 4")
print(ORF(seq)["Frame 4"])
print("Frame 5")
print(ORF(seq)["Frame 5"])
print("Frame 6")
print(ORF(seq)["Frame 6"])
print()
print("Longest ORF in sequence")
print(LongORF(seq))
print("AT : GC Ratio")
print(Ratio(seq))

Frames of Translation
F R T * Q F R S G I P G I Y Y K Q I T F N M L F S R D F I M L L A F A A V G A V
A A P V A E P Q P E A K A D A S P A D Y G D Y G D Y G D Y G N Y G T Y E K Y P S
S S S V T P T P T P T P T G Y G S Y A S Y G T Y K N K K V R E A A P E P A P V A
K R D D Y G N Y G D Y G D Y G N Y G N Y E N Y P S A T S S A T S S A T P S S S S
S S S S S A T P T P T P T G Y G S Y A S Y G A Y K N K K A R A P E A N P E D Y G
N Y G D Y G D Y G N Y E N Y T P A S T S S T S T S T V T P T P T P Y P T A T Q L
R L L S T T T L Q R E A I R S A * R * L L T V Y C I H T L R R * I A F N I * Y N
D Q C * L P R T D W R M D K
Phe Arg Thr * Gln Phe Arg Ser Gly Ile Pro Gly Ile Tyr Tyr Lys Gln Ile Thr Phe
Asn Met Lue Phe Ser Arg Asp Phe Ile Met Lue Lue Ala Phe Ala Ala Val Gly Ala Val
Ala Ala Pro Val Ala Glu Pro Gln Pro Glu Ala Lys Ala Asp Ala Ser Pro Ala Asp Tyr
Gly Asp Tyr Gly Asp Tyr Gly Asp Tyr Gly Asn Tyr Gly Thr Tyr Glu Lys Tyr Pro Ser
Ser Ser Ser Val Thr Pro Thr Pro Thr Pro Thr Pro Thr Gly Tyr Gly Ser Tyr Ala Ser
Tyr Gly Thr Tyr Lys Asn Lys Lys Val Arg Glu Ala Ala Pro Glu Pro Ala Pro Val Ala
Lys Arg Asp Asp Tyr Gly Asn Tyr Gly Asp Tyr Gly Asp Tyr Gly Asn Tyr Gly Asn Tyr
Glu Asn Tyr Pro Ser Ala Thr Ser Ser Ala Thr Ser Ser Ala Thr Pro Ser Ser Ser Ser
Ser Ser Ser Ser Ser Ala Thr Pro Thr Pro Thr Pro Thr Gly Tyr Gly Ser Tyr Ala Ser
Tyr Gly Ala Tyr Lys Asn Lys Lys Ala Arg Ala Pro Glu Ala Asn Pro Glu Asp Tyr Gly
Asn Tyr Gly Asp Tyr Gly Asp Tyr Gly Asn Tyr Glu Asn Tyr Thr Pro Ala Ser Thr Ser
Ser Thr Ser Thr Ser Thr Val Thr Pro Thr Pro Thr Pro Tyr Pro Thr Ala Thr Gln Lue
Arg Lue Lue Ser Thr Thr Thr Lue Gln Arg Glu Ala Ile Arg Ser Ala * Arg * Lue Lue
Thr Val Tyr Cys Ile His Thr Lue Arg Arg * Ile Ala Phe Asn Ile * Tyr Asn Asp Gln
Cys * Lue Pro Arg Thr Asp Trp Arg Met Asp Lys

L G L D S F G P E F P G S T T N K L H S I C F S Q E T L L C F W H S P P L A L S
L H L L Q S L N Q K P K L M L L Q R T M A T T V T M A I T E T M E H T R N T H P
L P P S H L H Q H Q H Q L D M V A M P A T V P T R T R K L E K P P Q S L L Q * Q
S V M I M E T M E T M V T T E T T A T T R T T L P L P L L L P L L P P L Q A P R
Q A L H Q L H Q H Q L Q P V T V H T Q A T E H T R T R K H V L Q K P T Q K T M E
T T V T T V I T A T T R T T P L P P P P P H P H P P S P Q L P L P T Q R L H N Y
G S * V L R H Y S A K R Y V Q L S D D Y * Q F I V Y T L L D D E S P L I Y D I M

8
I N V D C L A L I G E W I K
Lue Gly Lue Asp Ser Phe Gly Pro Glu Phe Pro Gly Ser Thr Thr Asn Lys Lue His Ser
Ile Cys Phe Ser Gln Glu Thr Lue Lue Cys Phe Trp His Ser Pro Pro Lue Ala Lue Ser
Lue His Lue Lue Gln Ser Lue Asn Gln Lys Pro Lys Lue Met Lue Lue Gln Arg Thr Met
Ala Thr Thr Val Thr Met Ala Ile Thr Glu Thr Met Glu His Thr Arg Asn Thr His Pro
Lue Pro Pro Ser His Lue His Gln His Gln His Gln Lue Asp Met Val Ala Met Pro Ala
Thr Val Pro Thr Arg Thr Arg Lys Lue Glu Lys Pro Pro Gln Ser Lue Lue Gln * Gln
Ser Val Met Ile Met Glu Thr Met Glu Thr Met Val Thr Thr Glu Thr Thr Ala Thr Thr
Arg Thr Thr Lue Pro Lue Pro Lue Lue Lue Pro Lue Lue Pro Pro Lue Gln Ala Pro Arg
Gln Ala Lue His Gln Lue His Gln His Gln Lue Gln Pro Val Thr Val His Thr Gln Ala
Thr Glu His Thr Arg Thr Arg Lys His Val Lue Gln Lys Pro Thr Gln Lys Thr Met Glu
Thr Thr Val Thr Thr Val Ile Thr Ala Thr Thr Arg Thr Thr Pro Lue Pro Pro Pro Pro
Pro His Pro His Pro Pro Ser Pro Gln Lue Pro Lue Pro Thr Gln Arg Lue His Asn Tyr
Gly Ser * Val Lue Arg His Tyr Ser Ala Lys Arg Tyr Val Gln Lue Ser Asp Asp Tyr *
Gln Phe Ile Val Tyr Thr Lue Lue Asp Asp Glu Ser Pro Lue Ile Tyr Asp Ile Met Ile
Asn Val Asp Cys Lue Ala Lue Ile Gly Glu Trp Ile Lys

* D L T V S V R N S R D L L Q T N Y I Q Y A F L K R L Y Y A F G I R R R W R C R
C T C C R A S T R S Q S * C F S S G L W R L R * L W R L R K L W N I R E I P I L
F L R H T Y T N T N T N W I W * L C Q L R Y L Q E Q E S * R S R P R A C S S S K
A * * L W K L W R L W * L R K L R Q L R E L P F R Y L F C Y L F C H P F K L L V
K L F I S Y T N T N S N R L R F I R K L R S I Q E Q E S T C S R S Q P R R L W K
L R * L R * L R Q L R E L H P C L H L L H I H I H R H P N S H S L P N G Y T T T
A P K Y Y D T T A R S D T F S L A M T I D S L L Y T H S * T M N R L * Y M I * *
S M L T A S H * L E N G * K
* Asp Lue Thr Val Ser Val Arg Asn Ser Arg Asp Lue Lue Gln Thr Asn Tyr Ile Gln
Tyr Ala Phe Lue Lys Arg Lue Tyr Tyr Ala Phe Gly Ile Arg Arg Arg Trp Arg Cys Arg
Cys Thr Cys Cys Arg Ala Ser Thr Arg Ser Gln Ser * Cys Phe Ser Ser Gly Lue Trp
Arg Lue Arg * Lue Trp Arg Lue Arg Lys Lue Trp Asn Ile Arg Glu Ile Pro Ile Lue
Phe Lue Arg His Thr Tyr Thr Asn Thr Asn Thr Asn Trp Ile Trp * Lue Cys Gln Lue
Arg Tyr Lue Gln Glu Gln Glu Ser * Arg Ser Arg Pro Arg Ala Cys Ser Ser Ser Lys
Ala * * Lue Trp Lys Lue Trp Arg Lue Trp * Lue Arg Lys Lue Arg Gln Lue Arg Glu
Lue Pro Phe Arg Tyr Lue Phe Cys Tyr Lue Phe Cys His Pro Phe Lys Lue Lue Val Lys
Lue Phe Ile Ser Tyr Thr Asn Thr Asn Ser Asn Arg Lue Arg Phe Ile Arg Lys Lue Arg
Ser Ile Gln Glu Gln Glu Ser Thr Cys Ser Arg Ser Gln Pro Arg Arg Lue Trp Lys Lue
Arg * Lue Arg * Lue Arg Gln Lue Arg Glu Lue His Pro Cys Lue His Lue Lue His Ile
His Ile His Arg His Pro Asn Ser His Ser Lue Pro Asn Gly Tyr Thr Thr Thr Ala Pro
Lys Tyr Tyr Asp Thr Thr Ala Arg Ser Asp Thr Phe Ser Lue Ala Met Thr Ile Asp Ser
Lue Lue Tyr Thr His Ser * Thr Met Asn Arg Lue * Tyr Met Ile * * Ser Met Lue Thr
Ala Ser His * Lue Glu Asn Gly * Lys

L L S I L Q S V R G S Q H * S L Y H I L K A I H R L R V C I Q * T V N S H R * A
E R I A S R C S V V V L R S R S C V A V G * G V G V G V T V D V D V E E V E A G
V * F S * L P * S P * S P * F P * S S G L A S G A R A F L F L Y A P * L A Y E P
* P V G V G V G V A D E E L D E E L E G V A E E V A E E V A E G * F S * L P * F
P * S P * S P * F P * S S R F A T G A G S G A A S L T F L F L * V P * L A * L P
Y P V G V G V G V G V T E E E D G Y F S Y V P * F P * S P * S P * S P * S A G E
A S A L A S G * G S A T G A A T A P T A A N A K S I I K S L E K S I L N V I C L

9
* * I P G I P D R N C Q V L
Lue Lue Ser Ile Lue Gln Ser Val Arg Gly Ser Gln His * Ser Lue Tyr His Ile Lue
Lys Ala Ile His Arg Lue Arg Val Cys Ile Gln * Thr Val Asn Ser His Arg * Ala Glu
Arg Ile Ala Ser Arg Cys Ser Val Val Val Lue Arg Ser Arg Ser Cys Val Ala Val Gly
* Gly Val Gly Val Gly Val Thr Val Asp Val Asp Val Glu Glu Val Glu Ala Gly Val *
Phe Ser * Lue Pro * Ser Pro * Ser Pro * Phe Pro * Ser Ser Gly Lue Ala Ser Gly
Ala Arg Ala Phe Lue Phe Lue Tyr Ala Pro * Lue Ala Tyr Glu Pro * Pro Val Gly Val
Gly Val Gly Val Ala Asp Glu Glu Lue Asp Glu Glu Lue Glu Gly Val Ala Glu Glu Val
Ala Glu Glu Val Ala Glu Gly * Phe Ser * Lue Pro * Phe Pro * Ser Pro * Ser Pro *
Phe Pro * Ser Ser Arg Phe Ala Thr Gly Ala Gly Ser Gly Ala Ala Ser Lue Thr Phe
Lue Phe Lue * Val Pro * Lue Ala * Lue Pro Tyr Pro Val Gly Val Gly Val Gly Val
Gly Val Thr Glu Glu Glu Asp Gly Tyr Phe Ser Tyr Val Pro * Phe Pro * Ser Pro *
Ser Pro * Ser Pro * Ser Ala Gly Glu Ala Ser Ala Lue Ala Ser Gly * Gly Ser Ala
Thr Gly Ala Ala Thr Ala Pro Thr Ala Ala Asn Ala Lys Ser Ile Ile Lys Ser Lue Glu
Lys Ser Ile Lue Asn Val Ile Cys Lue * * Ile Pro Gly Ile Pro Asp Arg Asn Cys Gln
Val Lue

F Y P F S N Q C E A V N I D H Y I I Y * R R F I V * E C V Y N K L S I V I A K L
N V S L R A V V S * Y L G A V V V * P L G R E W E L G * R W M W M W R R W R Q G
C S S R S C R N H R S H R S F H S L L G W L L E H V L S C S C M L R S L R M N R
N R L E L V L V * L M K S L T R S L K G W Q K R * Q K R * R K G S S R S C R S F
R S H H S L H S F H N H H A L L L E Q A L G R L L * L S C S C R Y R S W H S Y H
I Q L V L V L V * V * R R K R M G I S R M F H S F R N R H S H R S R H S P L E K
H Q L W L L V E A L Q Q V Q R Q R Q R R R M P K A * * S L L R K A Y * M * F V C
S R S R E F R T E T V K S *
Phe Tyr Pro Phe Ser Asn Gln Cys Glu Ala Val Asn Ile Asp His Tyr Ile Ile Tyr *
Arg Arg Phe Ile Val * Glu Cys Val Tyr Asn Lys Lue Ser Ile Val Ile Ala Lys Lue
Asn Val Ser Lue Arg Ala Val Val Ser * Tyr Lue Gly Ala Val Val Val * Pro Lue Gly
Arg Glu Trp Glu Lue Gly * Arg Trp Met Trp Met Trp Arg Arg Trp Arg Gln Gly Cys
Ser Ser Arg Ser Cys Arg Asn His Arg Ser His Arg Ser Phe His Ser Lue Lue Gly Trp
Lue Lue Glu His Val Lue Ser Cys Ser Cys Met Lue Arg Ser Lue Arg Met Asn Arg Asn
Arg Lue Glu Lue Val Lue Val * Lue Met Lys Ser Lue Thr Arg Ser Lue Lys Gly Trp
Gln Lys Arg * Gln Lys Arg * Arg Lys Gly Ser Ser Arg Ser Cys Arg Ser Phe Arg Ser
His His Ser Lue His Ser Phe His Asn His His Ala Lue Lue Lue Glu Gln Ala Lue Gly
Arg Lue Lue * Lue Ser Cys Ser Cys Arg Tyr Arg Ser Trp His Ser Tyr His Ile Gln
Lue Val Lue Val Lue Val * Val * Arg Arg Lys Arg Met Gly Ile Ser Arg Met Phe His
Ser Phe Arg Asn Arg His Ser His Arg Ser Arg His Ser Pro Lue Glu Lys His Gln Lue
Trp Lue Lue Val Glu Ala Lue Gln Gln Val Gln Arg Gln Arg Gln Arg Arg Arg Met Pro
Lys Ala * * Ser Lue Lue Arg Lys Ala Tyr * Met * Phe Val Cys Ser Arg Ser Arg Glu
Phe Arg Thr Glu Thr Val Lys Ser *

F I H S P I S A R Q S T L I I I S Y I K G D S S S K S V Y T I N C Q * S S L S *
T Y R F A L * C R S T * E P * L C S R W V G S G S W G D G G C G C G G G G G R G
V V L V V A V I T V V T V V S I V F W V G F W S T C F L V L V C S V A C V * T V
T G W S W C W C S * * R A * R G A * R G G R R G S R R G S G R V V L V V A V V S
V V T I V S I V S I I I T L C Y W S R L W G G F S N F L V L V G T V A G I A T I
S S W C W C W C R C D G G R G W V F L V C S I V S V I A I V T V V A I V R W R S
I S F G F W L R L C N R C S D S A N G G E C Q K H N K V S * E K H I E C N L F V

10
V D P G N S G P K L S S P K
Phe Ile His Ser Pro Ile Ser Ala Arg Gln Ser Thr Lue Ile Ile Ile Ser Tyr Ile Lys
Gly Asp Ser Ser Ser Lys Ser Val Tyr Thr Ile Asn Cys Gln * Ser Ser Lue Ser * Thr
Tyr Arg Phe Ala Lue * Cys Arg Ser Thr * Glu Pro * Lue Cys Ser Arg Trp Val Gly
Ser Gly Ser Trp Gly Asp Gly Gly Cys Gly Cys Gly Gly Gly Gly Gly Arg Gly Val Val
Lue Val Val Ala Val Ile Thr Val Val Thr Val Val Ser Ile Val Phe Trp Val Gly Phe
Trp Ser Thr Cys Phe Lue Val Lue Val Cys Ser Val Ala Cys Val * Thr Val Thr Gly
Trp Ser Trp Cys Trp Cys Ser * * Arg Ala * Arg Gly Ala * Arg Gly Gly Arg Arg Gly
Ser Arg Arg Gly Ser Gly Arg Val Val Lue Val Val Ala Val Val Ser Val Val Thr Ile
Val Ser Ile Val Ser Ile Ile Ile Thr Lue Cys Tyr Trp Ser Arg Lue Trp Gly Gly Phe
Ser Asn Phe Lue Val Lue Val Gly Thr Val Ala Gly Ile Ala Thr Ile Ser Ser Trp Cys
Trp Cys Trp Cys Arg Cys Asp Gly Gly Arg Gly Trp Val Phe Lue Val Cys Ser Ile Val
Ser Val Ile Ala Ile Val Thr Val Val Ala Ile Val Arg Trp Arg Ser Ile Ser Phe Gly
Phe Trp Lue Arg Lue Cys Asn Arg Cys Ser Asp Ser Ala Asn Gly Gly Glu Cys Gln Lys
His Asn Lys Val Ser * Glu Lys His Ile Glu Cys Asn Lue Phe Val Val Asp Pro Gly
Asn Ser Gly Pro Lys Lue Ser Ser Pro Lys

ORFS in Corresponding Frames


Frame 1
['ATGCTTTTCTCAAGAGACTTTATTATGCTTTTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCT
CAACCAGAAGCCAAAGCTGATGCTTCTCCAGCGGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAAC
ATACGAGAAATACCCATCCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCT
ACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAAC
TATGGAGACTATGGTGACTACGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGC
CACCCCTTCAAGCTCCTCGTCAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCT
ACGGAGCATACAAGAACAAGAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGAT
TACGGCAACTACGAGAACTACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTA
CCCAACGGCTACACAACTACGGCTCCTAAGTACTACGACACTACAGCGCGAAGCGATACGTTCAGCTTAG', 'ATGCTT
TTGGCATTCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCTCAACCAGAAGCCAAAGCTGATGCTTCTCCAGC
GGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCATCCTCTTCCTCCGTCA
CACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAA
GCCGCCCCAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTATGGAGACTATGGTGACTACGGAAACTACGG
CAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCTTCAT
CAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAGAAAGCACGTGCT
CCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTACACCCCTGCCTC
CACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTACGGCTCCTAAGTA
CTACGACACTACAGCGCGAAGCGATACGTTCAGCTTAG', 'ATGGATAAA']
Frame 2
['ATGCTTCTCCAGCGGACTATGGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCAT
CCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAAC
AAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG', 'ATGGCGACTACGGTGACTATGGCGATTACGGAAACT
ATGGAACATACGAGAAATACCCATCCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTAT
GCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG', 'ATGGCGATTACG
GAAACTATGGAACATACGAGAAATACCCATCCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGT
AGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG', 'ATGGAA
CATACGAGAAATACCCATCCTCTTCCTCCGTCACACCTACACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGC
TACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG',
'ATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG',

11
'ATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCCCAGAGCCTGCTCCAGTAG', 'ATGATTATG
GAAACTATGGAGACTATGGTGACTACGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCT
TCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGC
AAGCTACGGAGCATACAAGAACAAGAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACG
GTGATTACGGCAACTACGAGAACTACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACT
CCCTACCCAACGGCTACACAACTACGGCTCCTAA', 'ATGGAAACTATGGAGACTATGGTGACTACGGAAACTACGGCA
ACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCTTCATCA
GCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAGAAAGCACGTGCTCC
AGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTACACCCCTGCCTCCA
CCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTACGGCTCCTAA', 'A
TGGAGACTATGGTGACTACGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCA
CCCCTTCAAGCTCCTCGTCAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTAC
GGAGCATACAAGAACAAGAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTA
CGGCAACTACGAGAACTACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACC
CAACGGCTACACAACTACGGCTCCTAA', 'ATGGTGACTACGGAAACTACGGCAACTACGAGAACTACCCTTCCGCTAC
CTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCTTCATCAGCTACACCAACACCAACTCCAACCG
GTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAGAAAGCACGTGCTCCAGAAGCCAACCCAGAAGACTATGGA
AACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGT
CACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTACGGCTCCTAA', 'ATGGAAACTACGGTGACTACGGTGAT
TACGGCAACTACGAGAACTACACCCCTGCCTCCACCTCCTCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTA
CCCAACGGCTACACAACTACGGCTCCTAA', 'ATGATCAATGTTGACTGCCTCGCACTGATTGGAGAATGGATAAAA']
Frame 3
['ATGACTATTGACAGTTTATTGTATACACACTCTTAG', 'ATGAATCGCCTTTAA', 'ATGATATAA',
'ATGTTGACTGCCTCGCACTGA', '']
Frame 4
['']
Frame 5
['ATGTGGATGTGGAGGAGGTGGAGGCAGGGGTGTAGTTCTCGTAGTTGCCGTAATCACCGTAGTCACCGTAGTTTCCAT
AGTCTTCTGGGTTGGCTTCTGGAGCACGTGCTTTCTTGTTCTTGTATGCTCCGTAGCTTGCGTATGAACCGTAACCGGTT
GGAGTTGGTGTTGGTGTAG', 'ATGTGGAGGAGGTGGAGGCAGGGGTGTAGTTCTCGTAGTTGCCGTAATCACCGTAGT
CACCGTAGTTTCCATAGTCTTCTGGGTTGGCTTCTGGAGCACGTGCTTTCTTGTTCTTGTATGCTCCGTAGCTTGCGTAT
GAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAG',
'ATGCTCCGTAGCTTGCGTATGAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAG',
'ATGAACCGTAACCGGTTGGAGTTGGTGTTGGTGTAG',
'ATGAAGAGCTTGACGAGGAGCTTGAAGGGGTGGCAGAAGAGGTAG', 'ATGGGTATTTCTCGTATGTTCCATAGTTTC
CGTAATCGCCATAGTCACCGTAGTCGCCATAGTCCGCTGGAGAAGCATCAGCTTTGGCTTCTGGTTGAGGCTCTGCAACA
GGTGCAGCGACAGCGCCAACGGCGGCGAATGCCAAAAGCATAA', 'ATGTTCCATAGTTTCCGTAATCGCCATAGTCAC
CGTAGTCGCCATAGTCCGCTGGAGAAGCATCAGCTTTGGCTTCTGGTTGAGGCTCTGCAACAGGTGCAGCGACAGCGCCA
ACGGCGGCGAATGCCAAAAGCATAA', 'ATGCCAAAAGCATAA', 'ATGTAA', '']
Frame 6
['']

Longest ORF in sequence


('Number 1 Translational ORF in Frame 1', 'ATGCTTTTCTCAAGAGACTTTATTATGCTTTTGGCAT
TCGCCGCCGTTGGCGCTGTCGCTGCACCTGTTGCAGAGCCTCAACCAGAAGCCAAAGCTGATGCTTCTCCAGCGGACTAT
GGCGACTACGGTGACTATGGCGATTACGGAAACTATGGAACATACGAGAAATACCCATCCTCTTCCTCCGTCACACCTAC
ACCAACACCAACACCAACTGGATATGGTAGCTATGCCAGCTACGGTACCTACAAGAACAAGAAAGTTAGAGAAGCCGCCC
CAGAGCCTGCTCCAGTAGCAAAGCGTGATGATTATGGAAACTATGGAGACTATGGTGACTACGGAAACTACGGCAACTAC

12
GAGAACTACCCTTCCGCTACCTCTTCTGCTACCTCTTCTGCCACCCCTTCAAGCTCCTCGTCAAGCTCTTCATCAGCTAC
ACCAACACCAACTCCAACCGGTTACGGTTCATACGCAAGCTACGGAGCATACAAGAACAAGAAAGCACGTGCTCCAGAAG
CCAACCCAGAAGACTATGGAAACTACGGTGACTACGGTGATTACGGCAACTACGAGAACTACACCCCTGCCTCCACCTCC
TCCACATCCACATCCACCGTCACCCCAACTCCCACTCCCTACCCAACGGCTACACAACTACGGCTCCTAAGTACTACGAC
ACTACAGCGCGAAGCGATACGTTCAGCTTAG')
AT : GC Ratio
910 : 858

[ ]:

13

You might also like