26
26
* @file compo_mode_condition.c
28
* Authors: Alejandro Schaffer, Yi-Kuo Yu
30
27
* Functions to test whether conditional score matrix adjustment
31
28
* should be applied for a pair of matching sequences.
30
* Authors: Alejandro Schaffer, Yi-Kuo Yu
34
33
#ifndef SKIP_DOXYGEN_PROCESSING
35
34
static char const rcsid[] =
36
"$Id: compo_mode_condition.c,v 1.5 2005/12/01 13:49:43 gertz Exp $";
35
"$Id: compo_mode_condition.c,v 1.10 2006/05/03 14:09:52 gertz Exp $";
37
36
#endif /* SKIP_DOXYGEN_PROCESSING */
39
38
#include <algo/blast/core/ncbi_std.h>
41
40
#include <algo/blast/composition_adjustment/compo_mode_condition.h>
42
41
#include <algo/blast/composition_adjustment/matrix_frequency_data.h>
44
/* 180 degrees in half a circle */
43
/** 180 degrees in half a circle */
45
44
#define HALF_CIRCLE_DEGREES 180
46
/* some digits of PI */
45
/** some digits of PI */
47
46
#define PI 3.1415926543
48
/* thresholds used to determine which composition mode to use */
47
/** @{ thresholds used to determine which composition mode to use */
49
48
#define QUERY_MATCH_DISTANCE_THRESHOLD 0.16
50
49
#define LENGTH_RATIO_THRESHOLD 3.0
51
50
#define ANGLE_DEGREE_THRESHOLD 70.0
54
/* type of function used to choose a mode for composition-based
51
#define HIGH_PAIR_THRESHOLD 0.4
52
#define LENGTH_LOWER_THRESHOLD 50
55
/** type of function used to choose a mode for composition-based
55
56
* statistics. The variables are Queryseq_length, Matchseq_length,
56
57
* query_amino_count, match_amino_account and matrix_name.*/
57
typedef ECompoAdjustModes
58
typedef EMatrixAdjustRule
58
59
(*Condition) (int, int, const double *, const double *,
62
/* A function used to choose a mode for composition-based statistics.
63
/** Return true if length > 50 and the two most frequent letters
64
* occur a total of more that 40% of the time. */
66
s_HighPairFrequencies(const double * letterProbs, int length)
69
double max, second; /*two highest letter probabilities*/
71
if (length <= LENGTH_LOWER_THRESHOLD) {
76
for (i = 0; i < COMPO_NUM_TRUE_AA; i++) {
77
if (letterProbs[i] > second) {
78
second = letterProbs[i];
79
if (letterProbs[i] > max) {
85
return (max + second) > HIGH_PAIR_THRESHOLD;
89
* Return true if either the query or the matching sequences
90
* passes the test in s_HighPairFrequencies. */
92
s_HighPairEitherSeq(const double * P_query, int length1,
93
const double * P_match, int length2)
97
result1 = s_HighPairFrequencies(P_query, length1);
98
result2 = s_HighPairFrequencies(P_match, length2);
100
return result1 || result2;
104
/** Return eDontAdjustMatrix unconditionally */
105
static EMatrixAdjustRule
106
s_NeverAdjustMatrix(int Len_query, int Len_match,
107
const double * P_query, const double * P_match,
108
const char *matrix_name)
110
/* Suppress unused variable warnings */
117
return eDontAdjustMatrix;
121
/** Return eCompoScaleOldMatrix unconditionally */
122
static EMatrixAdjustRule
123
s_JustScaleOldMatrix(int Len_query, int Len_match,
124
const double * P_query, const double * P_match,
125
const char *matrix_name)
127
/* Suppress unused variable warnings */
134
return eCompoScaleOldMatrix;
138
/** A function used to choose a mode for composition-based statistics.
63
139
* If this function is used relative-entropy score adjustment is
64
140
* always applied, with a fixed value as the target relative entropy*/
65
static ECompoAdjustModes
66
TestToApplyREAdjustmentUnconditional(int Len_query,
68
const double * P_query,
69
const double * P_match,
70
const char *matrix_name)
141
static EMatrixAdjustRule
142
s_TestToApplyREAdjustmentUnconditional(int Len_query,
144
const double * P_query,
145
const double * P_match,
146
const char *matrix_name)
72
148
/* Suppress unused variable warnings */
142
218
len_large = len_m;
143
219
len_small = len_q;
145
if ((D_m_q > QUERY_MATCH_DISTANCE_THRESHOLD) &&
221
if (s_HighPairEitherSeq(P_query, Len_query, P_match, Len_match)) {
222
which_rule = eUserSpecifiedRelEntropy;
224
if ((D_m_q > QUERY_MATCH_DISTANCE_THRESHOLD) &&
146
225
(len_large / len_small > LENGTH_RATIO_THRESHOLD) &&
147
226
(angle > ANGLE_DEGREE_THRESHOLD)) {
148
mode_value = eCompoKeepOldMatrix;
150
mode_value = eUserSpecifiedRelEntropy;
227
which_rule = eCompoScaleOldMatrix;
229
which_rule = eUserSpecifiedRelEntropy;
157
237
* An array of functions that can be used to decide which optimization
158
238
* formulation should be used for score adjustment */
159
239
static Condition Cond_func[] = {
160
TestToApplyREAdjustmentConditional,
161
TestToApplyREAdjustmentUnconditional,
241
s_JustScaleOldMatrix,
242
s_TestToApplyREAdjustmentConditional,
243
s_TestToApplyREAdjustmentUnconditional,
167
* Choose how the relative entropy should be constrained based on
168
* properties of the two sequences to be aligned.
170
* @param length1 length of the first sequence
171
* @param length2 length of the second sequence
172
* @param probArray1 arrays of probabilities for the first sequence, in
173
* a 20 letter amino-acid alphabet
174
* @param probArray2 arrays of probabilities for the other sequence
175
* @param matrixName name of the scoring matrix
176
* @param testFunctionIndex allows different rules to be tested
177
* for the relative entropy decision.
180
Blast_ChooseCompoAdjustMode(int length1,
182
const double * probArray1,
183
const double * probArray2,
184
const char *matrixName,
185
int testFunctionIndex)
248
/* Documented in compo_mode_condition.h. */
250
Blast_ChooseMatrixAdjustRule(int length1,
252
const double * probArray1,
253
const double * probArray2,
254
const char *matrixName,
255
ECompoAdjustModes composition_adjust_mode)
257
int testFunctionIndex = (int) composition_adjust_mode;
188
260
Cond_func[testFunctionIndex] (length1, length2,
189
261
probArray1, probArray2, matrixName);