1
#ifndef ALGO_BLAST_CORE___BLAST_PSI__H
2
#define ALGO_BLAST_CORE___BLAST_PSI__H
4
/* $Id: blast_psi.h,v 1.10 2004/09/17 13:08:29 camacho Exp $
5
* ===========================================================================
8
* National Center for Biotechnology Information
10
* This software/database is a "United States Government Work" under the
11
* terms of the United States Copyright Act. It was written as part of
12
* the author's offical duties as a United States Government employee and
13
* thus cannot be copyrighted. This software/database is freely available
14
* to the public for use. The National Library of Medicine and the U.S.
15
* Government have not placed any restriction on its use or reproduction.
17
* Although all reasonable efforts have been taken to ensure the accuracy
18
* and reliability of the software and data, the NLM and the U.S.
19
* Government do not and cannot warrant the performance or results that
20
* may be obtained by using this software or data. The NLM and the U.S.
21
* Government disclaim all warranties, express or implied, including
22
* warranties of performance, merchantability or fitness for any particular
25
* Please cite the author in any work or product based on this material.
27
* ===========================================================================
29
* Author: Christiam Camacho
34
* High level definitions and declarations for the PSSM engine of PSI-BLAST.
37
#include <algo/blast/core/ncbi_std.h>
38
#include <algo/blast/core/blast_options.h>
44
/** Structure to describe the characteristics of a position in the multiple
45
* sequence alignment data structure
47
typedef struct PSIMsaCell {
48
Uint1 letter; /**< Preferred letter at this position */
49
Boolean is_aligned; /**< Is this letter part of the alignment? */
52
/** Structure representing the dimensions of the multiple sequence alignment
54
typedef struct PSIMsaDimensions {
55
Uint4 query_length; /**< Length of the query */
56
Uint4 num_seqs; /**< Number of distinct sequences aligned with the
57
query (does not include the query) */
60
/** Multiple sequence alignment (msa) data structure containing the raw data
61
* needed by the PSSM engine to create a PSSM. By convention, the first row of
62
* the data field contains the query sequence */
63
typedef struct PSIMsa {
64
PSIMsaDimensions* dimensions; /**< dimensions of the msa */
65
PSIMsaCell** data; /**< actual data, dimensions are
66
(dimensions->num_seqs+1) by
67
(dimensions->query_length) */
70
/** Allocates and initializes the multiple sequence alignment data structure
71
* for use as input to the PSSM engine.
72
* @param dimensions dimensions of multiple sequence alignment data structure
74
* @return allocated PSIMsa structure or NULL if out of memory.
77
PSIMsaNew(const PSIMsaDimensions* dimensions);
79
/** Deallocates the PSIMsa structure
80
* @param msa multiple sequence alignment structure to deallocate [in]
84
PSIMsaFree(PSIMsa* msa);
86
/** This is the main return value from the PSSM engine */
87
typedef struct PSIMatrix {
88
Uint4 ncols; /**< Number of columns in PSSM (query_length) */
89
Uint4 nrows; /**< Number of rows in PSSM (alphabet_size) */
90
int** pssm; /**< Position-specific score matrix */
91
double lambda; /**< Lambda Karlin-Altschul parameter */
92
double kappa; /**< Kappa Karlin-Altschul parameter */
93
double h; /**< H Karlin-Altschul parameter */
96
/** Allocates a new PSIMatrix structure
97
* @param query_length number of columns allocated for the PSSM [in]
98
* @param alphabet_size number of rows allocated for the PSSM [in]
99
* @return pointer to allocated PSIMatrix structure or NULL if out of memory
102
PSIMatrixNew(Uint4 query_length, Uint4 alphabet_size);
104
/** Deallocates the PSIMatrix structure passed in.
105
* @param matrix structure to deallocate [in]
109
PSIMatrixFree(PSIMatrix* matrix);
111
/** Structure to allow requesting various diagnostics data to be collected by
113
typedef struct PSIDiagnosticsRequest {
114
Boolean information_content; /**< request information content */
115
Boolean residue_frequencies; /**< request observed residue
117
Boolean weighted_residue_frequencies; /**< request observed weighted
118
residue frequencies */
119
Boolean frequency_ratios; /**< request frequency ratios */
120
Boolean gapless_column_weights; /**< request gapless column weights
122
} PSIDiagnosticsRequest;
124
/** This structure contains the diagnostics information requested using the
125
* PSIDiagnosticsRequest structure */
126
typedef struct PSIDiagnosticsResponse {
127
double* information_content; /**< position information content
128
(query_length elements)*/
129
Uint4** residue_freqs; /**< observed residue frequencies
130
per position of the PSSM
131
(Dimensions are query_length by
133
double** weighted_residue_freqs; /**< Weighted observed residue
134
frequencies per position of the
135
PSSM. (Dimensions are query_length
137
double** frequency_ratios; /**< PSSM's frequency ratios
138
(Dimensions are query_length by
140
double* gapless_column_weights; /**< Weights for columns without
141
gaps (query_length elements) */
142
Uint4 query_length; /**< Specifies the number of
143
positions in the PSSM */
144
Uint4 alphabet_size; /**< Specifies length of alphabet */
145
} PSIDiagnosticsResponse;
147
/** Allocates a new PSI-BLAST diagnostics structure based on which fields of
148
* the PSIDiagnosticsRequest structure are TRUE. Note: this is declared
149
* here for consistency - this does not need to be called by client code of
150
* this API, it is called in the PSICreatePssm* functions to allocate the
151
* diagnostics response structure.
152
* @param query_length length of the query sequence [in]
153
* @param alphabet_size length of the alphabet [in]
154
* @param request diagnostics to retrieve from PSSM engine [in]
155
* @return pointer to allocated PSIDiagnosticsResponse or NULL if dimensions or
158
PSIDiagnosticsResponse*
159
PSIDiagnosticsResponseNew(Uint4 query_length, Uint4 alphabet_size,
160
const PSIDiagnosticsRequest* request);
162
/** Deallocates the PSIDiagnosticsResponse structure passed in.
163
* @param diags structure to deallocate [in]
166
PSIDiagnosticsResponse*
167
PSIDiagnosticsResponseFree(PSIDiagnosticsResponse* diags);
169
/****************************************************************************/
171
/** Main entry point to core PSSM engine to calculate the PSSM.
172
* @param msap multiple sequence alignment data structure [in]
173
* @param options options to the PSSM engine [in]
174
* @param sbp BLAST score block structure [in|out]
175
* @param pssm PSSM and statistical information (the latter is also returned
176
* in the sbp->kbp_gap_psi[0])
177
* @return 0 on success, else failure (FIXME)
180
PSICreatePssm(const PSIMsa* msap,
181
const PSIBlastOptions* options,
185
/** Main entry point to core PSSM engine which allows to request diagnostics
187
* @param msap multiple sequence alignment data structure [in]
188
* @param options options to the PSSM engine [in]
189
* @param sbp BLAST score block structure [in|out]
190
* @param request diagnostics information request [in]
191
* @param pssm PSSM and statistical information (the latter is also returned
192
* in the sbp->kbp_gap_psi[0]) [out]
193
* @param diagnostics diagnostics information response, expects a pointer to an
194
* uninitialized structure which will be populated with data requested in
196
* @return 0 on success, else failure (FIXME)
199
PSICreatePssmWithDiagnostics(const PSIMsa* msap,
200
const PSIBlastOptions* options,
202
const PSIDiagnosticsRequest* request,
204
PSIDiagnosticsResponse** diagnostics);
209
#endif /* !ALGO_BLAST_CORE__BLAST_PSI__H */