~ubuntu-branches/ubuntu/feisty/ncbi-tools6/feisty

« back to all changes in this revision

Viewing changes to algo/blast/composition_adjustment/redo_alignment.c

  • Committer: Bazaar Package Importer
  • Author(s): Barry deFreese
  • Date: 2006-07-19 23:28:07 UTC
  • mfrom: (1.1.5 upstream)
  • Revision ID: james.westby@ubuntu.com-20060719232807-et3cdmcjgmnyleyx
Tags: 6.1.20060507-3ubuntu1
Re-merge with Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
22
22
*
23
23
* ===========================================================================*/
24
24
 
25
 
/** @file kappa_common.c
26
 
 *
27
 
 * @author Alejandro Schaffer, E. Michael Gertz
28
 
 *
 
25
/** @file redo_alignment.c
29
26
 * Routines for redoing a set of alignments, using either
30
27
 * composition matrix adjustment or the Smith-Waterman algorithm (or
31
28
 * both.)
 
29
 *
 
30
 * @author Alejandro Schaffer, E. Michael Gertz
32
31
 */
33
32
#ifndef SKIP_DOXYGEN_PROCESSING
34
33
static char const rcsid[] =
35
 
    "$Id: redo_alignment.c,v 1.2 2005/12/01 15:41:42 gertz Exp $";
 
34
    "$Id: redo_alignment.c,v 1.9 2006/05/03 14:10:39 gertz Exp $";
36
35
#endif /* SKIP_DOXYGEN_PROCESSING */
37
36
 
38
37
#include <stdlib.h>
46
45
#include <algo/blast/composition_adjustment/smith_waterman.h>
47
46
#include <algo/blast/composition_adjustment/compo_heap.h>
48
47
 
49
 
/* The natural log of 2, defined in newer systems as M_LN2 in math.h, but
50
 
   missing in older systems. */
 
48
/** The natural log of 2, defined in newer systems as M_LN2 in math.h, but
 
49
    missing in older systems. */
51
50
#define LOCAL_LN2 0.69314718055994530941723212145818
52
51
 
53
52
/** Define COMPO_INTENSE_DEBUG to be true to turn on rigorous but
65
64
#define COMPO_INTENSE_DEBUG 0
66
65
#endif
67
66
 
68
 
/** by what factor might initially reported E-value exceed true Evalue */
 
67
/** by what factor might initially reported E-value exceed true E-value */
69
68
#define EVALUE_STRETCH 5
70
69
 
71
70
/** -1/0/1 if a is less than/greater than/equal to b */
97
96
} s_WindowInfo;
98
97
 
99
98
 
100
 
/**
101
 
 * Create a new BlastCompo_Alignment; parameters to this function
102
 
 * correspond directly to fields of BlastCompo_Alignment */
 
99
/* Documented in redo_alignment.h. */
103
100
BlastCompo_Alignment *
104
101
BlastCompo_AlignmentNew(int score,
105
 
                           ECompoAdjustModes comp_adjustment_mode,
106
 
                           int queryStart, int queryEnd, int queryIndex,
107
 
                           int matchStart, int matchEnd, int frame,
108
 
                           void * context)
 
102
                        EMatrixAdjustRule matrix_adjust_rule,
 
103
                        int queryStart, int queryEnd, int queryIndex,
 
104
                        int matchStart, int matchEnd, int frame,
 
105
                        void * context)
109
106
{
110
107
    BlastCompo_Alignment * align = malloc(sizeof(BlastCompo_Alignment));
111
108
    if (align != NULL) {
112
109
        align->score = score;
113
 
        align->comp_adjustment_mode = comp_adjustment_mode;
 
110
        align->matrix_adjust_rule = matrix_adjust_rule;
114
111
        align->queryIndex = queryIndex;
115
112
        align->queryStart = queryStart;
116
113
        align->queryEnd = queryEnd;
124
121
}
125
122
 
126
123
 
127
 
/**
128
 
 * Recursively free all alignments in the singly linked list whose
129
 
 * head is *palign. Set *palign to NULL.
130
 
 *
131
 
 * @param palign            pointer to the head of a singly linked list
132
 
 *                          of alignments.
133
 
 */
 
124
/* Documented in redo_alignment.h. */
134
125
void
135
126
BlastCompo_AlignmentsFree(BlastCompo_Alignment ** palign,
136
127
                             void (*free_context)(void*))
202
193
}
203
194
 
204
195
 
 
196
/** Calculate the length of a list of BlastCompo_Alignment objects.
 
197
 *  This is an O(n) operation */
205
198
static int
206
199
s_DistinctAlignmentsLength(BlastCompo_Alignment * list) 
207
200
{
213
206
}
214
207
 
215
208
 
 
209
/**
 
210
 * Sort a list of Blast_Compo_Alignment objects, using s_AlignmentCmp 
 
211
 * comparison function.  The mergesort sorting algorithm is used.
 
212
 *
 
213
 * @param *plist        the list to be sorted
 
214
 * @param hspcnt        the length of the list
 
215
 */
216
216
static void
217
217
s_DistinctAlignmentsSort(BlastCompo_Alignment ** plist, int hspcnt)
218
218
{
219
 
    /* mergesort */
220
 
 
221
219
    if (COMPO_INTENSE_DEBUG) {
222
220
        assert(s_DistinctAlignmentsLength(*plist) == hspcnt);
223
221
    }
290
288
s_AlignmentCopy(const BlastCompo_Alignment * align)
291
289
{
292
290
    return BlastCompo_AlignmentNew(align->score,
293
 
                                      align->comp_adjustment_mode,
294
 
                                      align->queryStart,
295
 
                                      align->queryEnd,
296
 
                                      align->queryIndex,
297
 
                                      align->matchStart,
298
 
                                      align->matchEnd, align->frame,
299
 
                                      align->context);
 
291
                                   align->matrix_adjust_rule,
 
292
                                   align->queryStart,
 
293
                                   align->queryEnd,
 
294
                                   align->queryIndex,
 
295
                                   align->matchStart,
 
296
                                   align->matchEnd, align->frame,
 
297
                                   align->context);
300
298
    
301
299
}
302
300
 
323
321
 *
324
322
 * @param p_newAlign        on input the alignment that may be added to
325
323
 *                          the list; on output NULL
326
 
 * @param p_oldAlignment    on input the existing list of alignments;
 
324
 * @param p_oldAlignments   on input the existing list of alignments;
327
325
 *                          on output the new list
 
326
 * @param free_align_context    a routine to be used to free the context 
 
327
 *                              field of an alignment, if any alignment is
 
328
 *                              freed; may be NULL
328
329
 */
329
330
static void
330
331
s_WithDistinctEnds(BlastCompo_Alignment **p_newAlign,
331
332
                   BlastCompo_Alignment **p_oldAlignments,
332
 
                   void free_align_tracebacks(void *))
 
333
                   void free_align_context(void *))
333
334
{
334
335
    /* Deference the input parameters. */
335
336
    BlastCompo_Alignment * newAlign      = *p_newAlign;
373
374
                     align->matchEnd == newAlign->matchEnd))) {
374
375
                /* The alignment shares an end with newAlign; */
375
376
                /* delete it. */
376
 
                BlastCompo_AlignmentsFree(&align, free_align_tracebacks);
 
377
                BlastCompo_AlignmentsFree(&align, free_align_context);
377
378
            } else { /* The alignment does not share an end with newAlign; */
378
379
                /* add it to the output list. */
379
380
                *tail =  align;
383
384
        } /* end while align != NULL */
384
385
        *p_oldAlignments = newAlign;
385
386
    } else { /* do not include_new_align */
386
 
        BlastCompo_AlignmentsFree(&newAlign, free_align_tracebacks);
 
387
        BlastCompo_AlignmentsFree(&newAlign, free_align_context);
387
388
    } /* end else do not include newAlign */
388
389
}
389
390
 
829
830
 
830
831
 
831
832
/**
832
 
 * Compute an evalue from a score and a set of statistical parameters
 
833
 * Compute an e-value from a score and a set of statistical parameters
833
834
 */
834
835
static double
835
836
s_EvalueFromScore(int score, double Lambda, double logK, double searchsp)
846
847
#define KAPPA_BIT_TOL 2.0
847
848
 
848
849
 
 
850
/** Test of whether one set of HSP bounds is contained in another */
849
851
#define KAPPA_CONTAINED_IN_HSP(a,b,c,d,e,f) \
850
852
((a <= c && b >= c) && (d <= f && e >= f))
851
 
#define KAPPA_SIGN(a) ((a > 0) ? 1 : ((a < 0) ? -1 : 0))
 
853
/** A macro that defines the mathematical "sign" function */
 
854
#define KAPPA_SIGN(a) (((a) > 0) ? 1 : (((a) < 0) ? -1 : 0))
852
855
/**
853
856
 * Return true if an alignment is contained in a previously-computed
854
857
 * alignment of sufficiently high score.
891
894
}
892
895
 
893
896
 
894
 
/** Free a set of Blast_RedoAlignParams */
 
897
/* Documented in redo_alignment.h. */
895
898
void
896
899
Blast_RedoAlignParamsFree(Blast_RedoAlignParams ** pparams)
897
900
{
903
906
    }
904
907
}
905
908
 
906
 
/** Create new Blast_RedoAlignParams object.  The parameters of this
907
 
 * function correspond directly to the fields of
908
 
 * Blast_RedoAlignParams.  The new Blast_RedoAlignParams object takes
909
 
 * possession of *pmatrix_info and *pgapping_params, so these values
910
 
 * are set to NULL on exit. */
 
909
 
 
910
/* Documented in redo_alignment.h. */
911
911
Blast_RedoAlignParams *
912
912
Blast_RedoAlignParamsNew(Blast_MatrixInfo ** pmatrix_info,
913
913
                         BlastCompo_GappingParams ** pgapping_params,
914
 
                         int adjustParameters, int positionBased,
 
914
                         ECompoAdjustModes compo_adjust_mode,
 
915
                         int positionBased,
915
916
                         int subject_is_translated,
916
917
                         int ccat_query_length, int cutoff_s,
917
 
                         double cutoff_e, int do_link_hsps, double Lambda,
918
 
                         double logK,
 
918
                         double cutoff_e, int do_link_hsps,
919
919
                         const Blast_RedoAlignCallbacks * callbacks)
920
920
{
921
921
    Blast_RedoAlignParams * params = malloc(sizeof(Blast_RedoAlignParams));
925
925
        params->gapping_params = *pgapping_params;
926
926
        *pgapping_params = NULL;
927
927
 
928
 
        params->adjustParameters = adjustParameters;
 
928
        params->compo_adjust_mode = compo_adjust_mode;
929
929
        params->positionBased = positionBased;
930
930
        params->RE_pseudocounts = kReMatrixAdjustmentPseudocounts;
931
931
        params->subject_is_translated = subject_is_translated;
933
933
        params->cutoff_s = cutoff_s;
934
934
        params->cutoff_e = cutoff_e;
935
935
        params->do_link_hsps = do_link_hsps;
936
 
        params->Lambda = Lambda;
937
 
        params->logK = logK;
938
936
        params->callbacks = callbacks;
939
937
    } else {
940
938
        free(*pmatrix_info); *pmatrix_info = NULL;
944
942
}
945
943
 
946
944
 
947
 
/**
948
 
 * Recompute all alignments for one query/subject pair using
949
 
 * composition-based statistics or composition-based matrix adjustment.
950
 
 *
951
 
 * @param alignments       an array of lists containing the newly
952
 
 *                         computed alignments.  There is one array
953
 
 *                         element for each query in the original
954
 
 *                         search
955
 
 * @param params           parameters used to redo the alignments
956
 
 * @param incoming_aligns  a list of existing alignments
957
 
 * @param hspcnt           length of incoming_aligns
958
 
 * @param matchingSeq      the database sequence
959
 
 * @param ccat_query_length  the length of the concatenated query
960
 
 * @param query            information about all queries
961
 
 * @param numQueries       the number of queries
962
 
 * @param matrix           the scoring matrix
963
 
 * @param NRrecord         a workspace used to adjust the composition.
964
 
 *
965
 
 * @return 0 on success, -1 on out-of-memory
966
 
 */
 
945
/* Documented in redo_alignment.h. */
967
946
int
968
947
Blast_RedoOneMatch(BlastCompo_Alignment ** alignments,
969
948
                   Blast_RedoAlignParams * params,
970
949
                   BlastCompo_Alignment * incoming_aligns, int hspcnt,
 
950
                   double Lambda,
971
951
                   BlastCompo_MatchingSequence * matchingSeq,
972
952
                   int ccat_query_length, BlastCompo_QueryInfo query_info[],
973
953
                   int numQueries, int ** matrix,
974
 
                   Blast_CompositionWorkspace * NRrecord)
 
954
                   Blast_CompositionWorkspace * NRrecord,
 
955
                   double *pvalueForThisPair,
 
956
                   int compositionTestIndex,
 
957
                   double *LambdaRatio)
975
958
{
976
959
    int status = 0;                  /* return status */
977
960
    s_WindowInfo **windows;      /* array of windows */
979
962
    int window_index;                /* loop index */
980
963
    int query_index;                 /* index of the current query */
981
964
    /* which mode of composition adjustment is actually used? */
982
 
    ECompoAdjustModes whichMode = eNoCompositionAdjustment;
 
965
    EMatrixAdjustRule matrix_adjust_rule = eDontAdjustMatrix;
983
966
 
984
967
    /* fields of params, as local variables */
985
968
    Blast_MatrixInfo * scaledMatrixInfo = params->matrix_info;
986
 
    int adjustParameters = params->adjustParameters;
987
 
    int positionBased = params->positionBased;
988
 
    int RE_rule = params->adjustParameters - 1;
 
969
    ECompoAdjustModes compo_adjust_mode = params->compo_adjust_mode;
989
970
    int RE_pseudocounts = params->RE_pseudocounts;
990
971
    int subject_is_translated = params->subject_is_translated;
991
 
    double Lambda = params->Lambda;
992
972
    BlastCompo_GappingParams * gapping_params = params->gapping_params;
993
973
    const Blast_RedoAlignCallbacks * callbacks = params->callbacks;
994
974
 
995
 
    assert(adjustParameters < 2 || !positionBased);
 
975
    assert((int) compo_adjust_mode < 2 || !params->positionBased);
996
976
    for (query_index = 0;  query_index < numQueries;  query_index++) {
997
977
        alignments[query_index] = NULL;
998
978
    }
1035
1015
                /* adjust_search_failed is true only if Blast_AdjustScores
1036
1016
                 * is called and returns a nonzero value */
1037
1017
                int adjust_search_failed = 0;
1038
 
                if (adjustParameters &&
 
1018
                if (compo_adjust_mode != eNoCompositionBasedStats &&
1039
1019
                    (subject_is_translated || hsp_index == 0)) {
1040
1020
                    Blast_AminoAcidComposition subject_composition;
1041
1021
                    s_GetSubjectComposition(&subject_composition,
1047
1027
                                           query->length,
1048
1028
                                           &subject_composition,
1049
1029
                                           subject.length,
1050
 
                                           scaledMatrixInfo, RE_rule,
 
1030
                                           scaledMatrixInfo, compo_adjust_mode,
1051
1031
                                           RE_pseudocounts, NRrecord,
1052
 
                                           &whichMode,
1053
 
                                           callbacks->calc_lambda);
 
1032
                                           &matrix_adjust_rule,
 
1033
                                           callbacks->calc_lambda,
 
1034
                                           pvalueForThisPair,
 
1035
                                           compositionTestIndex,
 
1036
                                           LambdaRatio);
1054
1037
                    if (adjust_search_failed < 0) { /* fatal error */
1055
1038
                        status = adjust_search_failed;
1056
1039
                        goto window_index_loop_cleanup;
1059
1042
                if ( !adjust_search_failed ) {
1060
1043
                    newAlign =
1061
1044
                        callbacks->
1062
 
                        redo_one_alignment(in_align, whichMode,
 
1045
                        redo_one_alignment(in_align, matrix_adjust_rule,
1063
1046
                                           query, &window->query_range,
1064
1047
                                           ccat_query_length,
1065
1048
                                           &subject, &window->subject_range,
1066
1049
                                           matchingSeq->length,
1067
1050
                                           gapping_params);
1068
 
                    s_WithDistinctEnds(&newAlign, &alignments[query_index],
1069
 
                                       callbacks->free_align_traceback);
 
1051
                    if (newAlign->score >= params->cutoff_s) {
 
1052
                        s_WithDistinctEnds(&newAlign, &alignments[query_index],
 
1053
                                           callbacks->free_align_traceback);
 
1054
                    } else {
 
1055
                        BlastCompo_AlignmentsFree(&newAlign,
 
1056
                                                  callbacks->
 
1057
                                                  free_align_traceback);
 
1058
                    }
1070
1059
                }
1071
1060
            } /* end if in_align is not contained...*/
1072
1061
        } /* end for all alignments in this window */
1092
1081
}
1093
1082
 
1094
1083
 
1095
 
/**
1096
 
 * Recompute all alignments for one query/subject pair using the
1097
 
 * Smith-Waterman algorithm and possibly also composition-based
1098
 
 * statistics or composition-based matrix adjustment.
1099
 
 *
1100
 
 * @param alignments       an array of lists containing the newly
1101
 
 *                         computed alignments.  There is one array
1102
 
 *                         element for each query in the original
1103
 
 *                         search
1104
 
 * @param params           parameters used to redo the alignments
1105
 
 * @param incoming_aligns  a list of existing alignments
1106
 
 * @param hspcnt           length of incoming_aligns
1107
 
 * @param matchingSeq      the database sequence
1108
 
 * @param query            information about all queries
1109
 
 * @param numQueries       the number of queries
1110
 
 * @param matrix           the scoring matrix
1111
 
 * @param NRrecord         a workspace used to adjust the composition.
1112
 
 * @param forbidden        a workspace used to hold forbidden ranges
1113
 
 *                         for the Smith-Waterman algorithm.
1114
 
 * @param significantMatches   an array of heaps of alignments for
1115
 
 *                             query-subject pairs that have already
1116
 
 *                             been redone; used to terminate the
1117
 
 *                             Smith-Waterman algorithm early if it is
1118
 
 *                             clear that the current match is not
1119
 
 *                             significant enough to be saved.
1120
 
 *
1121
 
 * @return 0 on success, -1 on out-of-memory
1122
 
 */
 
1084
/* Documented in redo_alignment.h. */
1123
1085
int
1124
1086
Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment ** alignments,
1125
1087
                                Blast_RedoAlignParams * params,
1126
1088
                                BlastCompo_Alignment * incoming_aligns,
1127
1089
                                int hspcnt,
 
1090
                                double Lambda, double logK,
1128
1091
                                BlastCompo_MatchingSequence * matchingSeq,
1129
1092
                                BlastCompo_QueryInfo query_info[],
1130
1093
                                int numQueries,
1131
1094
                                int ** matrix,
1132
1095
                                Blast_CompositionWorkspace * NRrecord,
1133
1096
                                Blast_ForbiddenRanges * forbidden,
1134
 
                                BlastCompo_Heap * significantMatches)
 
1097
                                BlastCompo_Heap * significantMatches,
 
1098
                                double *pvalueForThisPair,
 
1099
                                int compositionTestIndex,
 
1100
                                double *LambdaRatio)
1135
1101
{
1136
1102
    int status = 0;                     /* status return value */
1137
1103
    s_WindowInfo **windows = NULL;  /* array of windows */
1139
1105
    int window_index;                   /* loop index */
1140
1106
    int query_index;                    /* index of the current query */
1141
1107
    /* which mode of composition adjustment is actually used? */
1142
 
    ECompoAdjustModes whichMode = eNoCompositionAdjustment;
 
1108
    EMatrixAdjustRule matrix_adjust_rule = eDontAdjustMatrix;
1143
1109
 
1144
1110
    /* fields of params, as local variables */
1145
1111
    Blast_MatrixInfo * scaledMatrixInfo = params->matrix_info;
1146
 
    int adjustParameters = params->adjustParameters;
 
1112
    ECompoAdjustModes compo_adjust_mode = params->compo_adjust_mode;
1147
1113
    int positionBased = params->positionBased;
1148
 
    int RE_rule = params->adjustParameters - 1;
1149
1114
    int RE_pseudocounts = params->RE_pseudocounts;
1150
1115
    int subject_is_translated = params->subject_is_translated;
1151
1116
    int do_link_hsps = params->do_link_hsps;
1152
1117
    int ccat_query_length = params->ccat_query_length;
1153
1118
    BlastCompo_GappingParams * gapping_params = params->gapping_params;
1154
 
    double Lambda = params->Lambda;
1155
 
    double logK = params->logK;
1156
1119
    const Blast_RedoAlignCallbacks * callbacks = params->callbacks;
1157
1120
 
1158
1121
    int gap_open = gapping_params->gap_open;
1159
1122
    int gap_extend = gapping_params->gap_extend;
1160
1123
 
1161
 
    assert(adjustParameters < 2 || !positionBased);
 
1124
    assert((int) compo_adjust_mode < 2 || !positionBased);
1162
1125
    for (query_index = 0;  query_index < numQueries;  query_index++) {
1163
1126
        alignments[query_index] = NULL;
1164
1127
    }
1197
1160
            
1198
1161
        /* For Smith-Waterman alignments, adjust the search using the
1199
1162
         * composition of the highest scoring alignment in window */
1200
 
        if (adjustParameters) {
 
1163
        if (compo_adjust_mode != eNoCompositionBasedStats) {
1201
1164
            Blast_AminoAcidComposition subject_composition;
1202
1165
            s_GetSubjectComposition(&subject_composition,
1203
1166
                                        &subject, &window->subject_range,
1205
1168
            adjust_search_failed =
1206
1169
                Blast_AdjustScores(matrix,
1207
1170
                                   query_composition, query->length,
1208
 
                                   &subject_composition, subject.length, 
1209
 
                                   scaledMatrixInfo,
1210
 
                                   RE_rule, RE_pseudocounts, NRrecord,
1211
 
                                   &whichMode, callbacks->calc_lambda);
 
1171
                                   &subject_composition, subject.length,
 
1172
                                   scaledMatrixInfo, compo_adjust_mode,
 
1173
                                   RE_pseudocounts, NRrecord,
 
1174
                                   &matrix_adjust_rule, callbacks->calc_lambda,
 
1175
                                   pvalueForThisPair,
 
1176
                                   compositionTestIndex,
 
1177
                                   LambdaRatio);
1212
1178
            if (adjust_search_failed < 0) { /* fatal error */
1213
1179
                status = adjust_search_failed;
1214
1180
                goto window_index_loop_cleanup;
1293
1259
                                        ccat_query_length,
1294
1260
                                        &subject, &window->subject_range,
1295
1261
                                        matchingSeq->length,
1296
 
                                        gapping_params, whichMode);
 
1262
                                        gapping_params, matrix_adjust_rule);
1297
1263
                    if (status != 0) {
1298
1264
                        goto window_index_loop_cleanup;
1299
1265
                    }
1340
1306
}
1341
1307
 
1342
1308
 
1343
 
/** Return true if a heuristic determines that it is unlikely to be
1344
 
 * worthwhile to redo a query-subject pair with the given evalue; used
1345
 
 * to terminate the main loop for redoing all alignments early. */
 
1309
/* Documented in redo_alignment.h. */
1346
1310
int
1347
1311
BlastCompo_EarlyTermination(double evalue,
1348
1312
                            BlastCompo_Heap significantMatches[],