23
23
* ===========================================================================*/
25
/** @file kappa_common.c
27
* @author Alejandro Schaffer, E. Michael Gertz
25
/** @file redo_alignment.c
29
26
* Routines for redoing a set of alignments, using either
30
27
* composition matrix adjustment or the Smith-Waterman algorithm (or
30
* @author Alejandro Schaffer, E. Michael Gertz
33
32
#ifndef SKIP_DOXYGEN_PROCESSING
34
33
static char const rcsid[] =
35
"$Id: redo_alignment.c,v 1.2 2005/12/01 15:41:42 gertz Exp $";
34
"$Id: redo_alignment.c,v 1.9 2006/05/03 14:10:39 gertz Exp $";
36
35
#endif /* SKIP_DOXYGEN_PROCESSING */
38
37
#include <stdlib.h>
46
45
#include <algo/blast/composition_adjustment/smith_waterman.h>
47
46
#include <algo/blast/composition_adjustment/compo_heap.h>
49
/* The natural log of 2, defined in newer systems as M_LN2 in math.h, but
50
missing in older systems. */
48
/** The natural log of 2, defined in newer systems as M_LN2 in math.h, but
49
missing in older systems. */
51
50
#define LOCAL_LN2 0.69314718055994530941723212145818
53
52
/** Define COMPO_INTENSE_DEBUG to be true to turn on rigorous but
101
* Create a new BlastCompo_Alignment; parameters to this function
102
* correspond directly to fields of BlastCompo_Alignment */
99
/* Documented in redo_alignment.h. */
103
100
BlastCompo_Alignment *
104
101
BlastCompo_AlignmentNew(int score,
105
ECompoAdjustModes comp_adjustment_mode,
106
int queryStart, int queryEnd, int queryIndex,
107
int matchStart, int matchEnd, int frame,
102
EMatrixAdjustRule matrix_adjust_rule,
103
int queryStart, int queryEnd, int queryIndex,
104
int matchStart, int matchEnd, int frame,
110
107
BlastCompo_Alignment * align = malloc(sizeof(BlastCompo_Alignment));
111
108
if (align != NULL) {
112
109
align->score = score;
113
align->comp_adjustment_mode = comp_adjustment_mode;
110
align->matrix_adjust_rule = matrix_adjust_rule;
114
111
align->queryIndex = queryIndex;
115
112
align->queryStart = queryStart;
116
113
align->queryEnd = queryEnd;
128
* Recursively free all alignments in the singly linked list whose
129
* head is *palign. Set *palign to NULL.
131
* @param palign pointer to the head of a singly linked list
124
/* Documented in redo_alignment.h. */
135
126
BlastCompo_AlignmentsFree(BlastCompo_Alignment ** palign,
136
127
void (*free_context)(void*))
210
* Sort a list of Blast_Compo_Alignment objects, using s_AlignmentCmp
211
* comparison function. The mergesort sorting algorithm is used.
213
* @param *plist the list to be sorted
214
* @param hspcnt the length of the list
217
217
s_DistinctAlignmentsSort(BlastCompo_Alignment ** plist, int hspcnt)
221
219
if (COMPO_INTENSE_DEBUG) {
222
220
assert(s_DistinctAlignmentsLength(*plist) == hspcnt);
290
288
s_AlignmentCopy(const BlastCompo_Alignment * align)
292
290
return BlastCompo_AlignmentNew(align->score,
293
align->comp_adjustment_mode,
298
align->matchEnd, align->frame,
291
align->matrix_adjust_rule,
296
align->matchEnd, align->frame,
324
322
* @param p_newAlign on input the alignment that may be added to
325
323
* the list; on output NULL
326
* @param p_oldAlignment on input the existing list of alignments;
324
* @param p_oldAlignments on input the existing list of alignments;
327
325
* on output the new list
326
* @param free_align_context a routine to be used to free the context
327
* field of an alignment, if any alignment is
330
331
s_WithDistinctEnds(BlastCompo_Alignment **p_newAlign,
331
332
BlastCompo_Alignment **p_oldAlignments,
332
void free_align_tracebacks(void *))
333
void free_align_context(void *))
334
335
/* Deference the input parameters. */
335
336
BlastCompo_Alignment * newAlign = *p_newAlign;
373
374
align->matchEnd == newAlign->matchEnd))) {
374
375
/* The alignment shares an end with newAlign; */
376
BlastCompo_AlignmentsFree(&align, free_align_tracebacks);
377
BlastCompo_AlignmentsFree(&align, free_align_context);
377
378
} else { /* The alignment does not share an end with newAlign; */
378
379
/* add it to the output list. */
383
384
} /* end while align != NULL */
384
385
*p_oldAlignments = newAlign;
385
386
} else { /* do not include_new_align */
386
BlastCompo_AlignmentsFree(&newAlign, free_align_tracebacks);
387
BlastCompo_AlignmentsFree(&newAlign, free_align_context);
387
388
} /* end else do not include newAlign */
846
847
#define KAPPA_BIT_TOL 2.0
850
/** Test of whether one set of HSP bounds is contained in another */
849
851
#define KAPPA_CONTAINED_IN_HSP(a,b,c,d,e,f) \
850
852
((a <= c && b >= c) && (d <= f && e >= f))
851
#define KAPPA_SIGN(a) ((a > 0) ? 1 : ((a < 0) ? -1 : 0))
853
/** A macro that defines the mathematical "sign" function */
854
#define KAPPA_SIGN(a) (((a) > 0) ? 1 : (((a) < 0) ? -1 : 0))
853
856
* Return true if an alignment is contained in a previously-computed
854
857
* alignment of sufficiently high score.
906
/** Create new Blast_RedoAlignParams object. The parameters of this
907
* function correspond directly to the fields of
908
* Blast_RedoAlignParams. The new Blast_RedoAlignParams object takes
909
* possession of *pmatrix_info and *pgapping_params, so these values
910
* are set to NULL on exit. */
910
/* Documented in redo_alignment.h. */
911
911
Blast_RedoAlignParams *
912
912
Blast_RedoAlignParamsNew(Blast_MatrixInfo ** pmatrix_info,
913
913
BlastCompo_GappingParams ** pgapping_params,
914
int adjustParameters, int positionBased,
914
ECompoAdjustModes compo_adjust_mode,
915
916
int subject_is_translated,
916
917
int ccat_query_length, int cutoff_s,
917
double cutoff_e, int do_link_hsps, double Lambda,
918
double cutoff_e, int do_link_hsps,
919
919
const Blast_RedoAlignCallbacks * callbacks)
921
921
Blast_RedoAlignParams * params = malloc(sizeof(Blast_RedoAlignParams));
925
925
params->gapping_params = *pgapping_params;
926
926
*pgapping_params = NULL;
928
params->adjustParameters = adjustParameters;
928
params->compo_adjust_mode = compo_adjust_mode;
929
929
params->positionBased = positionBased;
930
930
params->RE_pseudocounts = kReMatrixAdjustmentPseudocounts;
931
931
params->subject_is_translated = subject_is_translated;
948
* Recompute all alignments for one query/subject pair using
949
* composition-based statistics or composition-based matrix adjustment.
951
* @param alignments an array of lists containing the newly
952
* computed alignments. There is one array
953
* element for each query in the original
955
* @param params parameters used to redo the alignments
956
* @param incoming_aligns a list of existing alignments
957
* @param hspcnt length of incoming_aligns
958
* @param matchingSeq the database sequence
959
* @param ccat_query_length the length of the concatenated query
960
* @param query information about all queries
961
* @param numQueries the number of queries
962
* @param matrix the scoring matrix
963
* @param NRrecord a workspace used to adjust the composition.
965
* @return 0 on success, -1 on out-of-memory
945
/* Documented in redo_alignment.h. */
968
947
Blast_RedoOneMatch(BlastCompo_Alignment ** alignments,
969
948
Blast_RedoAlignParams * params,
970
949
BlastCompo_Alignment * incoming_aligns, int hspcnt,
971
951
BlastCompo_MatchingSequence * matchingSeq,
972
952
int ccat_query_length, BlastCompo_QueryInfo query_info[],
973
953
int numQueries, int ** matrix,
974
Blast_CompositionWorkspace * NRrecord)
954
Blast_CompositionWorkspace * NRrecord,
955
double *pvalueForThisPair,
956
int compositionTestIndex,
976
959
int status = 0; /* return status */
977
960
s_WindowInfo **windows; /* array of windows */
979
962
int window_index; /* loop index */
980
963
int query_index; /* index of the current query */
981
964
/* which mode of composition adjustment is actually used? */
982
ECompoAdjustModes whichMode = eNoCompositionAdjustment;
965
EMatrixAdjustRule matrix_adjust_rule = eDontAdjustMatrix;
984
967
/* fields of params, as local variables */
985
968
Blast_MatrixInfo * scaledMatrixInfo = params->matrix_info;
986
int adjustParameters = params->adjustParameters;
987
int positionBased = params->positionBased;
988
int RE_rule = params->adjustParameters - 1;
969
ECompoAdjustModes compo_adjust_mode = params->compo_adjust_mode;
989
970
int RE_pseudocounts = params->RE_pseudocounts;
990
971
int subject_is_translated = params->subject_is_translated;
991
double Lambda = params->Lambda;
992
972
BlastCompo_GappingParams * gapping_params = params->gapping_params;
993
973
const Blast_RedoAlignCallbacks * callbacks = params->callbacks;
995
assert(adjustParameters < 2 || !positionBased);
975
assert((int) compo_adjust_mode < 2 || !params->positionBased);
996
976
for (query_index = 0; query_index < numQueries; query_index++) {
997
977
alignments[query_index] = NULL;
1035
1015
/* adjust_search_failed is true only if Blast_AdjustScores
1036
1016
* is called and returns a nonzero value */
1037
1017
int adjust_search_failed = 0;
1038
if (adjustParameters &&
1018
if (compo_adjust_mode != eNoCompositionBasedStats &&
1039
1019
(subject_is_translated || hsp_index == 0)) {
1040
1020
Blast_AminoAcidComposition subject_composition;
1041
1021
s_GetSubjectComposition(&subject_composition,
1048
1028
&subject_composition,
1049
1029
subject.length,
1050
scaledMatrixInfo, RE_rule,
1030
scaledMatrixInfo, compo_adjust_mode,
1051
1031
RE_pseudocounts, NRrecord,
1053
callbacks->calc_lambda);
1032
&matrix_adjust_rule,
1033
callbacks->calc_lambda,
1035
compositionTestIndex,
1054
1037
if (adjust_search_failed < 0) { /* fatal error */
1055
1038
status = adjust_search_failed;
1056
1039
goto window_index_loop_cleanup;
1059
1042
if ( !adjust_search_failed ) {
1062
redo_one_alignment(in_align, whichMode,
1045
redo_one_alignment(in_align, matrix_adjust_rule,
1063
1046
query, &window->query_range,
1064
1047
ccat_query_length,
1065
1048
&subject, &window->subject_range,
1066
1049
matchingSeq->length,
1067
1050
gapping_params);
1068
s_WithDistinctEnds(&newAlign, &alignments[query_index],
1069
callbacks->free_align_traceback);
1051
if (newAlign->score >= params->cutoff_s) {
1052
s_WithDistinctEnds(&newAlign, &alignments[query_index],
1053
callbacks->free_align_traceback);
1055
BlastCompo_AlignmentsFree(&newAlign,
1057
free_align_traceback);
1071
1060
} /* end if in_align is not contained...*/
1072
1061
} /* end for all alignments in this window */
1096
* Recompute all alignments for one query/subject pair using the
1097
* Smith-Waterman algorithm and possibly also composition-based
1098
* statistics or composition-based matrix adjustment.
1100
* @param alignments an array of lists containing the newly
1101
* computed alignments. There is one array
1102
* element for each query in the original
1104
* @param params parameters used to redo the alignments
1105
* @param incoming_aligns a list of existing alignments
1106
* @param hspcnt length of incoming_aligns
1107
* @param matchingSeq the database sequence
1108
* @param query information about all queries
1109
* @param numQueries the number of queries
1110
* @param matrix the scoring matrix
1111
* @param NRrecord a workspace used to adjust the composition.
1112
* @param forbidden a workspace used to hold forbidden ranges
1113
* for the Smith-Waterman algorithm.
1114
* @param significantMatches an array of heaps of alignments for
1115
* query-subject pairs that have already
1116
* been redone; used to terminate the
1117
* Smith-Waterman algorithm early if it is
1118
* clear that the current match is not
1119
* significant enough to be saved.
1121
* @return 0 on success, -1 on out-of-memory
1084
/* Documented in redo_alignment.h. */
1124
1086
Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment ** alignments,
1125
1087
Blast_RedoAlignParams * params,
1126
1088
BlastCompo_Alignment * incoming_aligns,
1090
double Lambda, double logK,
1128
1091
BlastCompo_MatchingSequence * matchingSeq,
1129
1092
BlastCompo_QueryInfo query_info[],
1130
1093
int numQueries,
1132
1095
Blast_CompositionWorkspace * NRrecord,
1133
1096
Blast_ForbiddenRanges * forbidden,
1134
BlastCompo_Heap * significantMatches)
1097
BlastCompo_Heap * significantMatches,
1098
double *pvalueForThisPair,
1099
int compositionTestIndex,
1100
double *LambdaRatio)
1136
1102
int status = 0; /* status return value */
1137
1103
s_WindowInfo **windows = NULL; /* array of windows */
1139
1105
int window_index; /* loop index */
1140
1106
int query_index; /* index of the current query */
1141
1107
/* which mode of composition adjustment is actually used? */
1142
ECompoAdjustModes whichMode = eNoCompositionAdjustment;
1108
EMatrixAdjustRule matrix_adjust_rule = eDontAdjustMatrix;
1144
1110
/* fields of params, as local variables */
1145
1111
Blast_MatrixInfo * scaledMatrixInfo = params->matrix_info;
1146
int adjustParameters = params->adjustParameters;
1112
ECompoAdjustModes compo_adjust_mode = params->compo_adjust_mode;
1147
1113
int positionBased = params->positionBased;
1148
int RE_rule = params->adjustParameters - 1;
1149
1114
int RE_pseudocounts = params->RE_pseudocounts;
1150
1115
int subject_is_translated = params->subject_is_translated;
1151
1116
int do_link_hsps = params->do_link_hsps;
1152
1117
int ccat_query_length = params->ccat_query_length;
1153
1118
BlastCompo_GappingParams * gapping_params = params->gapping_params;
1154
double Lambda = params->Lambda;
1155
double logK = params->logK;
1156
1119
const Blast_RedoAlignCallbacks * callbacks = params->callbacks;
1158
1121
int gap_open = gapping_params->gap_open;
1159
1122
int gap_extend = gapping_params->gap_extend;
1161
assert(adjustParameters < 2 || !positionBased);
1124
assert((int) compo_adjust_mode < 2 || !positionBased);
1162
1125
for (query_index = 0; query_index < numQueries; query_index++) {
1163
1126
alignments[query_index] = NULL;
1198
1161
/* For Smith-Waterman alignments, adjust the search using the
1199
1162
* composition of the highest scoring alignment in window */
1200
if (adjustParameters) {
1163
if (compo_adjust_mode != eNoCompositionBasedStats) {
1201
1164
Blast_AminoAcidComposition subject_composition;
1202
1165
s_GetSubjectComposition(&subject_composition,
1203
1166
&subject, &window->subject_range,
1205
1168
adjust_search_failed =
1206
1169
Blast_AdjustScores(matrix,
1207
1170
query_composition, query->length,
1208
&subject_composition, subject.length,
1210
RE_rule, RE_pseudocounts, NRrecord,
1211
&whichMode, callbacks->calc_lambda);
1171
&subject_composition, subject.length,
1172
scaledMatrixInfo, compo_adjust_mode,
1173
RE_pseudocounts, NRrecord,
1174
&matrix_adjust_rule, callbacks->calc_lambda,
1176
compositionTestIndex,
1212
1178
if (adjust_search_failed < 0) { /* fatal error */
1213
1179
status = adjust_search_failed;
1214
1180
goto window_index_loop_cleanup;
1293
1259
ccat_query_length,
1294
1260
&subject, &window->subject_range,
1295
1261
matchingSeq->length,
1296
gapping_params, whichMode);
1262
gapping_params, matrix_adjust_rule);
1297
1263
if (status != 0) {
1298
1264
goto window_index_loop_cleanup;
1343
/** Return true if a heuristic determines that it is unlikely to be
1344
* worthwhile to redo a query-subject pair with the given evalue; used
1345
* to terminate the main loop for redoing all alignments early. */
1309
/* Documented in redo_alignment.h. */
1347
1311
BlastCompo_EarlyTermination(double evalue,
1348
1312
BlastCompo_Heap significantMatches[],