1
static char const rcsid[] =
2
"$Id: blast_psi.c,v 1.14 2004/10/18 14:33:14 camacho Exp $";
3
/* ===========================================================================
6
* National Center for Biotechnology Information
8
* This software/database is a "United States Government Work" under the
9
* terms of the United States Copyright Act. It was written as part of
10
* the author's offical duties as a United States Government employee and
11
* thus cannot be copyrighted. This software/database is freely available
12
* to the public for use. The National Library of Medicine and the U.S.
13
* Government have not placed any restriction on its use or reproduction.
15
* Although all reasonable efforts have been taken to ensure the accuracy
16
* and reliability of the software and data, the NLM and the U.S.
17
* Government do not and cannot warrant the performance or results that
18
* may be obtained by using this software or data. The NLM and the U.S.
19
* Government disclaim all warranties, express or implied, including
20
* warranties of performance, merchantability or fitness for any particular
23
* Please cite the author in any work or product based on this material.
25
* ===========================================================================
27
* Author: Christiam Camacho
32
* Implementation of the high level functions of PSI-BLAST's PSSM engine.
35
#include <algo/blast/core/blast_stat.h>
36
#include <algo/blast/core/blast_encoding.h>
37
#include "blast_psi_priv.h"
39
/****************************************************************************/
40
/* Function prototypes */
42
/** Convenience function to deallocate data structures allocated in
43
* PSICreatePssmWithDiagnostics.
46
_PSICreatePssmCleanUp(PSIMatrix** pssm,
48
_PSIAlignedBlock* aligned_block,
49
_PSISequenceWeights* seq_weights,
50
_PSIInternalPssmData* internal_pssm);
52
/** Copies pssm data from internal_pssm and sbp into pssm. None of its
53
* parameters can be NULL. */
55
_PSISavePssm(const _PSIInternalPssmData* internal_pssm,
56
const BlastScoreBlk* sbp,
59
/****************************************************************************/
62
PSICreatePssm(const PSIMsa* msap,
63
const PSIBlastOptions* options,
67
return PSICreatePssmWithDiagnostics(msap, options, sbp, NULL,
72
PSICreatePssmWithDiagnostics(const PSIMsa* msap, /* [in] */
73
const PSIBlastOptions* options, /* [in] */
74
BlastScoreBlk* sbp, /* [in] */
75
const PSIDiagnosticsRequest* request, /* [in] */
76
PSIMatrix** pssm, /* [out] */
77
PSIDiagnosticsResponse** diagnostics) /* [out] */
80
_PSIAlignedBlock* aligned_block = NULL;
81
_PSISequenceWeights* seq_weights = NULL;
82
_PSIInternalPssmData* internal_pssm = NULL;
85
if ( !msap || !options || !sbp || !pssm ) {
86
return PSIERR_BADPARAM;
89
/*** Allocate data structures ***/
90
msa = _PSIMsaNew(msap, (Uint4) sbp->alphabet_size);
91
aligned_block = _PSIAlignedBlockNew(msa->dimensions->query_length);
92
seq_weights = _PSISequenceWeightsNew(msa->dimensions, sbp);
93
internal_pssm = _PSIInternalPssmDataNew(msa->dimensions->query_length,
94
(Uint4) sbp->alphabet_size);
95
*pssm = PSIMatrixNew(msa->dimensions->query_length,
96
(Uint4) sbp->alphabet_size);
97
if ( !msa || ! aligned_block || !seq_weights || !internal_pssm || !*pssm ) {
98
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
100
return PSIERR_OUTOFMEM;
103
/*** Enable structure group customization if needed ***/
104
if (options->ignore_consensus) {
105
_PSIPurgeAlignedRegion(msa, kQueryIndex, 0,
106
msa->dimensions->query_length);
109
/*** Run the engine's stages ***/
110
status = _PSIPurgeBiasedSegments(msa);
111
if (status != PSI_SUCCESS) {
112
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
117
status = _PSIValidateMSA(msa, options->ignore_consensus);
118
if (status != PSI_SUCCESS) {
119
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
124
status = _PSIComputeAlignmentBlocks(msa, aligned_block);
125
if (status != PSI_SUCCESS) {
126
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
131
status = _PSIComputeSequenceWeights(msa, aligned_block,
132
options->ignore_consensus,
134
if (status != PSI_SUCCESS) {
135
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
140
status = _PSIComputeResidueFrequencies(msa, seq_weights, sbp,
142
options->pseudo_count,
144
if (status != PSI_SUCCESS) {
145
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
150
status = _PSIConvertResidueFreqsToPSSM(internal_pssm, msa->query, sbp,
151
seq_weights->std_prob);
152
if (status != PSI_SUCCESS) {
153
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
158
/* FIXME: instead of NULL pass options->scaling_factor */
159
status = _PSIScaleMatrix(msa->query, msa->dimensions->query_length,
160
seq_weights->std_prob, NULL, internal_pssm, sbp);
161
if (status != PSI_SUCCESS) {
162
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
167
/*** Save the pssm outgoing parameter ***/
168
_PSISavePssm(internal_pssm, sbp, *pssm);
170
/*** Save diagnostics if required ***/
171
if (request && diagnostics) {
172
*diagnostics = PSIDiagnosticsResponseNew(msa->dimensions->query_length,
173
(Uint4) sbp->alphabet_size,
175
if ( !*diagnostics ) {
176
/* FIXME: This could be changed to return a warning and not
177
* deallocate PSSM data */
178
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
180
return PSIERR_OUTOFMEM;
182
status = _PSISaveDiagnostics(msa, aligned_block, seq_weights,
183
internal_pssm, *diagnostics);
184
if (status != PSI_SUCCESS) {
185
*diagnostics = PSIDiagnosticsResponseFree(*diagnostics);
186
_PSICreatePssmCleanUp(pssm, msa, aligned_block, seq_weights,
191
_PSICreatePssmCleanUp(NULL, msa, aligned_block, seq_weights, internal_pssm);
196
/****************************************************************************/
199
_PSICreatePssmCleanUp(PSIMatrix** pssm,
201
_PSIAlignedBlock* aligned_block,
202
_PSISequenceWeights* seq_weights,
203
_PSIInternalPssmData* internal_pssm)
206
*pssm = PSIMatrixFree(*pssm);
209
_PSIAlignedBlockFree(aligned_block);
210
_PSISequenceWeightsFree(seq_weights);
211
_PSIInternalPssmDataFree(internal_pssm);
215
_PSISavePssm(const _PSIInternalPssmData* internal_pssm,
216
const BlastScoreBlk* sbp,
219
ASSERT(internal_pssm);
223
_PSICopyIntMatrix(pssm->pssm, internal_pssm->pssm,
224
pssm->ncols, pssm->nrows);
226
pssm->lambda = sbp->kbp_gap_psi[0]->Lambda;
227
pssm->kappa = sbp->kbp_gap_psi[0]->K;
228
pssm->h = sbp->kbp_gap_psi[0]->H;
231
/****************************************************************************/
234
PSIMsaNew(const PSIMsaDimensions* dimensions)
236
PSIMsa* retval = NULL;
242
retval = (PSIMsa*) malloc(sizeof(PSIMsa));
244
return PSIMsaFree(retval);
247
retval->dimensions = (PSIMsaDimensions*) malloc(sizeof(PSIMsaDimensions));
248
if ( !retval->dimensions ) {
249
return PSIMsaFree(retval);
251
memcpy((void*) retval->dimensions,
253
sizeof(PSIMsaDimensions));
255
retval->data = (PSIMsaCell**) _PSIAllocateMatrix(dimensions->num_seqs + 1,
256
dimensions->query_length,
258
if ( !retval->data ) {
259
return PSIMsaFree(retval);
262
Uint4 s = 0; /* index on sequences */
263
Uint4 p = 0; /* index on positions */
265
for (s = 0; s < dimensions->num_seqs + 1; s++) {
266
for (p = 0; p < dimensions->query_length; p++) {
267
/* FIXME: change to 0 when old code is dropped */
268
retval->data[s][p].letter = (Uint1) -1;
269
retval->data[s][p].is_aligned = FALSE;
278
PSIMsaFree(PSIMsa* msa)
284
if ( msa->data && msa->dimensions ) {
285
_PSIDeallocateMatrix((void**) msa->data,
286
msa->dimensions->num_seqs + 1);
290
if ( msa->dimensions ) {
291
sfree(msa->dimensions);
300
PSIMatrixNew(Uint4 query_length, Uint4 alphabet_size)
302
PSIMatrix* retval = NULL;
304
retval = (PSIMatrix*) malloc(sizeof(PSIMatrix));
308
retval->ncols = query_length;
309
retval->nrows = alphabet_size;
311
retval->pssm = (int**) _PSIAllocateMatrix(query_length, alphabet_size,
313
if ( !(retval->pssm) ) {
314
return PSIMatrixFree(retval);
317
retval->lambda = 0.0;
325
PSIMatrixFree(PSIMatrix* matrix)
332
_PSIDeallocateMatrix((void**) matrix->pssm, matrix->ncols);
340
PSIDiagnosticsResponse*
341
PSIDiagnosticsResponseNew(Uint4 query_length, Uint4 alphabet_size,
342
const PSIDiagnosticsRequest* wants)
344
PSIDiagnosticsResponse* retval = NULL;
350
/* MUST use calloc to allocate structure because code that uses this
351
* structure assumes that non-NULL members will require to be populated */
352
retval = (PSIDiagnosticsResponse*) calloc(1,
353
sizeof(PSIDiagnosticsResponse));
358
retval->query_length = query_length;
359
retval->alphabet_size = alphabet_size;
361
if (wants->information_content) {
362
retval->information_content = (double*)
363
calloc(query_length, sizeof(double));
364
if ( !(retval->information_content) ) {
365
return PSIDiagnosticsResponseFree(retval);
369
if (wants->residue_frequencies) {
370
retval->residue_freqs = (Uint4**) _PSIAllocateMatrix(query_length,
373
if ( !(retval->residue_freqs) ) {
374
return PSIDiagnosticsResponseFree(retval);
378
if (wants->weighted_residue_frequencies) {
379
retval->weighted_residue_freqs = (double**)
380
_PSIAllocateMatrix(query_length,
383
if ( !(retval->weighted_residue_freqs) ) {
384
return PSIDiagnosticsResponseFree(retval);
388
if (wants->frequency_ratios) {
389
retval->frequency_ratios = (double**)
390
_PSIAllocateMatrix(query_length,
393
if ( !retval->frequency_ratios ) {
394
return PSIDiagnosticsResponseFree(retval);
398
if (wants->gapless_column_weights) {
399
retval->gapless_column_weights = (double*)
400
calloc(query_length, sizeof(double));
401
if ( !(retval->gapless_column_weights) ) {
402
return PSIDiagnosticsResponseFree(retval);
409
PSIDiagnosticsResponse*
410
PSIDiagnosticsResponseFree(PSIDiagnosticsResponse* diags)
415
if (diags->information_content) {
416
sfree(diags->information_content);
419
if (diags->residue_freqs) {
420
_PSIDeallocateMatrix((void**) diags->residue_freqs,
421
diags->query_length);
424
if (diags->weighted_residue_freqs) {
425
_PSIDeallocateMatrix((void**) diags->weighted_residue_freqs,
426
diags->query_length);
429
if (diags->frequency_ratios) {
430
_PSIDeallocateMatrix((void**) diags->frequency_ratios,
431
diags->query_length);
434
if (diags->gapless_column_weights) {
435
sfree(diags->gapless_column_weights);