1
/* $Id: blast_query_info.c,v 1.3 2006/04/25 19:06:15 camacho Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* Author: Christiam Camacho
30
/** @file blast_query_info.c
31
* Functions to manipulate the BlastQueryInfo structure
35
#ifndef SKIP_DOXYGEN_PROCESSING
36
static char const rcsid[] =
37
"$Id: blast_query_info.c,v 1.3 2006/04/25 19:06:15 camacho Exp $";
38
#endif /* SKIP_DOXYGEN_PROCESSING */
40
#include <algo/blast/core/blast_query_info.h>
41
#include <algo/blast/core/blast_util.h>
42
#include <algo/blast/core/pattern.h>
45
Blast_GetQueryIndexFromContext(Int4 context, EBlastProgramType program)
47
Int4 index = -1; /* -1 is used to indicate error */
49
case eBlastTypeBlastn:
50
index = context/NUM_STRANDS;
52
case eBlastTypeBlastp:
53
case eBlastTypeTblastn:
54
case eBlastTypeRpsBlast:
55
case eBlastTypePsiBlast:
56
case eBlastTypeRpsTblastn:
57
case eBlastTypePhiBlastn:
58
case eBlastTypePhiBlastp:
61
case eBlastTypeBlastx:
62
case eBlastTypeTblastx:
63
index = context/NUM_FRAMES;
71
BlastQueryInfo* BlastQueryInfoNew(EBlastProgramType program, int num_queries)
73
const unsigned int kNumContexts = BLAST_GetNumberOfContexts(program);
74
BlastQueryInfo* retval = NULL;
76
if (num_queries <= 0) {
79
ASSERT(kNumContexts != 0);
81
retval = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
83
return BlastQueryInfoFree(retval);
86
retval->num_queries = num_queries;
88
retval->first_context = 0;
89
retval->last_context = retval->num_queries * kNumContexts - 1;
91
retval->contexts = (BlastContextInfo*) calloc(retval->last_context + 1,
92
sizeof(BlastContextInfo));
94
if ( !retval->contexts ) {
95
return BlastQueryInfoFree(retval);
98
for (i = 0; i < retval->last_context + 1; i++) {
99
retval->contexts[i].query_index =
100
Blast_GetQueryIndexFromContext(i, program);
101
ASSERT(retval->contexts[i].query_index != -1);
103
retval->contexts[i].frame = BLAST_ContextToFrame(program, i);
104
ASSERT(retval->contexts[i].frame != INT1_MAX);
106
retval->contexts[i].is_valid = TRUE;
113
BlastQueryInfo* BlastQueryInfoFree(BlastQueryInfo* query_info)
116
sfree(query_info->contexts);
117
query_info->pattern_info =
118
SPHIQueryInfoFree(query_info->pattern_info);
124
BlastQueryInfo* BlastQueryInfoDup(BlastQueryInfo* query_info)
126
BlastQueryInfo* retval = BlastMemDup(query_info, sizeof(BlastQueryInfo));
127
Int4 num_contexts = query_info->last_context + 1;
130
BlastMemDup(query_info->contexts, num_contexts * sizeof(BlastContextInfo));
132
if (query_info->pattern_info) {
133
retval->pattern_info =
134
SPHIQueryInfoCopy(query_info->pattern_info);
140
/** Calculates length of the DNA query from the BlastQueryInfo structure that
141
* contains context information for translated frames for a set of queries.
142
* @param query_info Query information containing data for all contexts [in]
143
* @param query_index Which query to find DNA length for?
144
* @return DNA length of the query, calculated as sum of 3 protein frame lengths,
145
* plus 2, because 2 last nucleotide residues do not have a
146
* corresponding codon.
149
s_GetTranslatedQueryDNALength(const BlastQueryInfo* query_info, Int4 query_index)
151
Int4 start_context = NUM_FRAMES*query_index;
155
/* Make sure that query index is within appropriate range, and that this is
156
really a translated search */
157
ASSERT(query_index < query_info->num_queries);
158
ASSERT(start_context < query_info->last_context);
160
/* If only reverse strand is searched, then forward strand contexts don't
161
have lengths information */
162
if (query_info->contexts[start_context].query_length == 0)
165
for (index = start_context; index < start_context + 3; ++index)
166
dna_length += query_info->contexts[index].query_length;
171
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo* qinfo,
172
EBlastProgramType program,
175
const Uint4 kNumContexts = BLAST_GetNumberOfContexts(program);
176
ASSERT(query_index < qinfo->num_queries);
178
if (Blast_QueryIsTranslated(program)) {
179
return s_GetTranslatedQueryDNALength(qinfo, query_index);
180
} else if (program == eBlastTypeBlastn) {
181
Int4 retval = qinfo->contexts[query_index*kNumContexts].query_length;
183
retval = qinfo->contexts[query_index*kNumContexts+1].query_length;
187
return qinfo->contexts[query_index*kNumContexts].query_length;
191
/* FIXME: should the EBlastProgramType be added as a member of the
192
* BlastQueryInfo structure? Without it, there's many operations that can't be
193
* done, so it doesn't make sense to have them separate... */
195
BlastQueryInfoGetEffSearchSpace(const BlastQueryInfo* qinfo,
196
EBlastProgramType program,
201
const Uint4 kNumContexts = BLAST_GetNumberOfContexts(program);
202
ASSERT(query_index < qinfo->num_queries);
204
for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
205
if ( (retval = qinfo->contexts[i].eff_searchsp) != 0) {
213
BlastQueryInfoSetEffSearchSpace(BlastQueryInfo* qinfo,
214
EBlastProgramType program,
219
const Uint4 kNumContexts = BLAST_GetNumberOfContexts(program);
220
ASSERT(query_index < qinfo->num_queries);
222
for (i = query_index*kNumContexts; i < (query_index+1)*kNumContexts; i++) {
223
qinfo->contexts[i].eff_searchsp = eff_searchsp;
227
Int4 BSearchContextInfo(Int4 n, const BlastQueryInfo * A)
229
Int4 m=0, b=0, e=0, size=0;
231
size = A->last_context+1;
237
if (A->contexts[m].query_offset > n)
246
QueryInfo_GetSeqBufLen(const BlastQueryInfo* qinfo)
248
BlastContextInfo * cinfo = & qinfo->contexts[qinfo->last_context];
249
return cinfo->query_offset + cinfo->query_length + (cinfo->query_length ? 2 : 1);
253
ContextOffsetsToOffsetArray(BlastQueryInfo* info)
255
/* The Many Values of 'Length'
257
* 1. info->last_context: the index of the last query offset.
259
* 2. count: the number of query offsets.
261
* 3. count + 1: the size of the output array (has an 'extra'
262
* member so as to communicate the last sequence length).
264
* 4. sz: the size of the context object array
267
Uint4 count = (info->last_context + 1);
268
Uint4 sz = sizeof(Int4) * (count+1);
273
ASSERT(info->contexts);
276
memset(result, 0, sz);
278
for(frame = 0; frame < count; frame++) {
279
result[frame] = info->contexts[frame].query_offset;
282
/* One more entry, provides length info for last element. */
284
result[count] = info->contexts[count-1].query_offset;
286
if (info->contexts[count-1].query_length) {
287
result[count] += info->contexts[count-1].query_length + 1;
294
OffsetArrayToContextOffsets(BlastQueryInfo * info,
296
EBlastProgramType prog)
298
Uint4 count = (info->last_context + 1);
304
if (! info->contexts) {
305
info->contexts = calloc(count, sizeof(BlastContextInfo));
308
for(i = 0; i < count; i++) {
311
info->contexts[i].query_offset = new_offsets[i];
313
distance = new_offsets[i+1] - new_offsets[i];
314
info->contexts[i].query_length = distance ? distance-1 : 0;
316
/* Set the frame and query index */
318
info->contexts[i].frame =
319
BLAST_ContextToFrame(prog, i);
321
info->contexts[i].query_index =
322
Blast_GetQueryIndexFromContext(i, prog);
327
Blast_GetOneQueryStructs(BlastQueryInfo** one_query_info_ptr,
328
BLAST_SequenceBlk** one_query_ptr,
329
const BlastQueryInfo* query_info,
330
BLAST_SequenceBlk* query, Int4 query_index)
336
BlastQueryInfo* one_query_info = NULL;
337
BLAST_SequenceBlk* one_query = NULL;
339
if (!one_query_info_ptr || !one_query_ptr || !query_info || !query ||
340
query_index >= query_info->num_queries)
343
num_frames = (query_info->last_context / query_info->num_queries) + 1;
344
first_context = query_index*num_frames;
345
query_offset = query_info->contexts[first_context].query_offset;
347
one_query_info = *one_query_info_ptr;
348
/* If this hasn't been already done, allocate new query information
350
if (!one_query_info) {
351
one_query_info = (BlastQueryInfo*) calloc(1, sizeof(BlastQueryInfo));
352
*one_query_info_ptr = one_query_info;
353
one_query_info->contexts = (BlastContextInfo*) calloc(num_frames, sizeof(BlastContextInfo));
355
one_query = *one_query_ptr;
356
/* If this hasn't been already done, allocate new sequence block. */
358
one_query = (BLAST_SequenceBlk*) calloc(1, sizeof(BLAST_SequenceBlk));
359
*one_query_ptr = one_query;
361
if (!one_query_info || !one_query)
364
one_query_info->num_queries = 1;
365
one_query_info->last_context = num_frames - 1;
367
memcpy(one_query_info->contexts,
368
&query_info->contexts[first_context],
369
num_frames * sizeof(BlastContextInfo));
371
/* Make context offsets relative to this query. */
372
for (index = 0; index < num_frames; ++index) {
373
one_query_info->contexts[index].query_offset -= query_offset;
376
/* Fill the sequence block information for this one query. */
377
memset(one_query, 0, sizeof(BLAST_SequenceBlk));
378
one_query->sequence = &query->sequence[query_offset];
380
one_query_info->contexts[num_frames-1].query_offset +
381
one_query_info->contexts[num_frames-1].query_length;
382
one_query->sequence_allocated = FALSE;
383
one_query->oid = query_index;