1
/*****************************************************************
2
* SQUID - a library of functions for biological sequence analysis
3
* Copyright (C) 1992-2002 Washington University School of Medicine
5
* This source code is freely distributed under the terms of the
6
* GNU General Public License. See the files COPYRIGHT and LICENSE
8
*****************************************************************/
12
* Globally defines the IUPAC symbols for nucleic acid sequence
13
* Slowly evolving into a repository of globals. Tue Apr 20 1993
15
* RCS $Id: iupac.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: iupac.c,v 1.3 2001/02/21 21:09:10 eddy Exp)
19
/* Default expected nucleotide occurrence frequencies, A/C/G/T.
20
* Used (for instance) as the default distribution for
21
* i.i.d. random nucleotide sequences.
23
float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
25
/* Dayhoff f(i) amino acid occurrence frequencies.
26
* From SwissProt 34: 21,210,388 residues
27
* In alphabetic order by single-letter code.
28
* Used (for instance) as the default distribution for
29
* i.i.d. random protein sequences.
54
char aa_alphabet[] = AMINO_ALPHABET;
55
/* aa_index converts to pam's 27x27 scheme */
56
int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11,
57
12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
59
/* IUPAC code translations */
60
/* note: sequence chars are UPPER CASE */
61
struct iupactype iupac[] = {
62
{ 'A', 'T', NTA, NTT, },
63
{ 'C', 'G', NTC, NTG, },
64
{ 'G', 'C', NTG, NTC, },
65
{ 'T', 'A', NTT, NTA, },
66
{ 'U', 'A', NTU, NTA, },
67
{ 'N', 'N', NTN, NTN, },
68
{ ' ', ' ', NTGAP, NTGAP, },
69
{ 'R', 'Y', NTR, NTY, },
70
{ 'Y', 'R', NTY, NTR, },
71
{ 'M', 'K', NTM, NTK, },
72
{ 'K', 'M', NTK, NTM, },
73
{ 'S', 'S', NTS, NTS, },
74
{ 'W', 'W', NTW, NTW, },
75
{ 'H', 'D', NTH, NTD, },
76
{ 'B', 'V', NTB, NTV, },
77
{ 'V', 'B', NTV, NTB, },
78
{ 'D', 'H', NTD, NTH, },
82
char *stdcode1[65] = {
153
char *stdcode3[65] = {