1
/* $Id: blast_inline.h,v 1.4 2004/09/01 13:19:43 madden Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's offical duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
28
/** @file blast_inline.h
29
* @todo FIXME needs file description
32
#include <algo/blast/core/blast_util.h>
33
#include <algo/blast/core/blast_lookup.h>
34
#include <algo/blast/core/mb_lookup.h>
36
/** Given a word packed into an integer, compute a discontiguous word lookup
38
* @param subject Pointer to the next byte of the sequence after the end of
39
* the word (needed when word template is longer than 16 bases) [in]
40
* @param word A piece of the sequence packed into an integer [in]
41
* @param template_type What type of discontiguous word template to use [in]
42
* @return The lookup table index of the discontiguous word [out]
44
static NCBI_INLINE Int4 ComputeDiscontiguousIndex(Uint1* subject, Int4 word,
50
switch (template_type) {
52
index = GET_WORD_INDEX_11_16(word);
55
index = GET_WORD_INDEX_12_16(word);
58
index = GET_WORD_INDEX_11_16_OPT(word);
61
index = GET_WORD_INDEX_12_16_OPT(word);
64
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);
65
index = (GET_WORD_INDEX_11_18(word) | extra_code);
68
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18(subject);
69
index = (GET_WORD_INDEX_12_18(word) | extra_code);
72
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);
73
index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);
76
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_18_OPT(subject);
77
index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);
80
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);
81
index = (GET_WORD_INDEX_11_21(word) | extra_code);
84
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21(subject);
85
index = (GET_WORD_INDEX_12_21(word) | extra_code);
88
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);
89
index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);
92
extra_code = (Int4) GET_EXTRA_CODE_PACKED_4_21_OPT(subject);
93
index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);
100
#ifdef USE_HASH_TABLE
101
hash_buf = (Uint1*)&index;
102
CRC32(crc, hash_buf);
103
index = (crc>>hash_shift) & hash_mask;
109
/** Compute the lookup table index for the first word template, given a word
110
* position, template type and previous value of the word, in case of
111
* one-base (2 bit) database scanning.
112
* @param word_start Pointer to the start of a word in the sequence [in]
113
* @param word The word packed into an integer value [in]
114
* @param sequence_bit By how many bits the real word start is shifted within
115
* a compressed sequence byte [in]
116
* @param template_type What discontiguous word template to use for index
118
* @return The lookup index for the discontiguous word.
120
static NCBI_INLINE Int4 ComputeDiscontiguousIndex_1b(const Uint1* word_start,
121
Int4 word, Uint1 sequence_bit, Uint1 template_type)
124
Uint1* subject = (Uint1 *) word_start;
126
Int4 extra_code, tmpval;
128
/* Prepare auxiliary variables for extra code calculation */
131
/* The bits in an integer byte are counted in a reverse order than in a
133
bit = 6 - sequence_bit;
135
switch (template_type) {
137
index = GET_WORD_INDEX_11_16(word);
140
index = GET_WORD_INDEX_12_16(word);
142
case TEMPL_11_16_OPT:
143
index = GET_WORD_INDEX_11_16_OPT(word);
145
case TEMPL_12_16_OPT:
146
index = GET_WORD_INDEX_12_16_OPT(word);
149
GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);
150
index = (GET_WORD_INDEX_11_18(word) | extra_code);
153
GET_EXTRA_CODE_PACKED_18(subject, bit, tmpval, extra_code);
154
index = (GET_WORD_INDEX_12_18(word) | extra_code);
156
case TEMPL_11_18_OPT:
157
GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);
158
index = (GET_WORD_INDEX_11_18_OPT(word) | extra_code);
160
case TEMPL_12_18_OPT:
161
GET_EXTRA_CODE_PACKED_18_OPT(subject, bit, tmpval, extra_code);
162
index = (GET_WORD_INDEX_12_18_OPT(word) | extra_code);
165
GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);
166
index = (GET_WORD_INDEX_11_21(word) | extra_code);
169
GET_EXTRA_CODE_PACKED_21(subject, bit, tmpval, extra_code);
170
index = (GET_WORD_INDEX_12_21(word) | extra_code);
172
case TEMPL_11_21_OPT:
173
GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);
174
index = (GET_WORD_INDEX_11_21_OPT(word) | extra_code);
176
case TEMPL_12_21_OPT:
177
GET_EXTRA_CODE_PACKED_21_OPT(subject, bit, tmpval, extra_code);
178
index = (GET_WORD_INDEX_12_21_OPT(word) | extra_code);
185
#ifdef USE_HASH_TABLE
186
hash_buf = (Uint1*)&index;
187
CRC32(crc, hash_buf);
188
index = (crc>>hash_shift) & hash_mask;
194
static NCBI_INLINE void _ComputeIndex(Int4 wordsize,
200
static NCBI_INLINE void _ComputeIndexIncremental(Int4 wordsize,
206
/** Given a word, compute its index value from scratch.
208
* @param wordsize length of the word, in residues [in]
209
* @param charsize length of one residue, in bits [in]
210
* @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in]
211
* @param word pointer to the beginning of the word [in]
212
* @param index the computed index value [out]
215
static NCBI_INLINE void _ComputeIndex(Int4 wordsize,
225
for(i=0;i<wordsize;i++)
227
*index = ((*index << charsize) | word[i]) & mask;
233
/** Given a word, compute its index value, reusing a previously
234
* computed index value.
236
* @param wordsize length of the word - 1, in residues [in]
237
* @param charsize length of one residue, in bits [in]
238
* @param mask value used to mask the index so that only the bottom wordsize * charsize bits remain [in]
239
* @param word pointer to the beginning of the word [in]
240
* @param index the computed index value [in/out]
243
static NCBI_INLINE void _ComputeIndexIncremental(Int4 wordsize,
249
*index = ((*index << charsize) | word[wordsize - 1]) & mask;
254
/* Given a starting position of a word in a compressed nucleotide sequence,
255
* compute this word's lookup table index
257
static NCBI_INLINE Uint1* BlastNaLookupInitIndex(Int4 length,
258
const Uint1* word, Int4* index)
263
for (i = 0; i < length; ++i)
264
*index = ((*index)<<FULL_BYTE_SHIFT) | word[i];
265
return (Uint1 *) (word + length);
268
/* Recompute the word index given its previous value and the new location
269
* of the last byte of the word
271
static NCBI_INLINE Int4 BlastNaLookupComputeIndex(Int4 scan_shift, Int4 mask,
272
const Uint1* word, Int4 index)
274
return (((index)<<scan_shift) & mask) | *(word);
278
/* Given a word computed from full bytes of a compressed sequence,
279
* shift it by 0-3 bases
281
static NCBI_INLINE Int4 BlastNaLookupAdjustIndex(Uint1* s, Int4 index,
282
Int4 mask, Uint1 bit)
284
return (((index)<<bit) & mask) | ((*s)>>(FULL_BYTE_SHIFT-bit));
289
#define BLAST2NA_MASK 0xfc
291
/** Compute the lookup index for a word in an uncompressed sequence, without
292
* using any previous index information.
293
* @param lookup Pointer to the traditional BLASTn lookup table structure [in]
294
* @param word Pointer to the start of the word [in]
295
* @param index The lookup index [out]
297
static NCBI_INLINE Int2
298
Na_LookupComputeIndex(BlastLookupTable* lookup, Uint1* word, Int4* index)
301
Int4 wordsize = lookup->reduced_wordsize*COMPRESSION_RATIO; /* i.e. 8 or 4 */
304
for (i = 0; i < wordsize; ++i) {
305
if ((word[i] & BLAST2NA_MASK) != 0) {
309
*index = (((*index)<<lookup->charsize) & lookup->mask) | word[i];
315
/** Pack 4 sequence bytes into a one byte integer, assuming sequence contains
318
#define PACK_WORD(q) ((q[0]<<6) + (q[1]<<4) + (q[2]<<2) + q[3])
320
/** Compare a given number of bytes of an compressed subject sequence with
321
* the non-compressed query sequence.
322
* @param q Pointer to the first byte to be compared in the query sequence [in]
323
* @param s Pointer to the first byte to be compared in the subject
325
* @param extra_bytes Number of compressed bytes to compare [in]
326
* @return TRUE if sequences are identical, FALSE if mismatch is found.
328
static NCBI_INLINE Boolean BlastNaCompareExtraBytes(Uint1* q, Uint1* s,
333
for (index = 0; index < extra_bytes; ++index) {
334
if (*s++ != PACK_WORD(q))
336
q += COMPRESSION_RATIO;
341
/** Perform mini extension (up to max_left <= 4 bases) to the left;
342
* @param q Pointer to the query base right after the ones to be extended [in]
343
* @param s Pointer to a byte in the compressed subject sequence that is to be
344
* tested for extension [in]
345
* @param max_left Maximal number of bits to compare [in]
346
* @return Number of matched bases
348
static NCBI_INLINE Uint1
349
BlastNaMiniExtendLeft(Uint1* q, const Uint1* s, Uint1 max_left)
353
for (left = 0; left < max_left; ++left) {
354
if (NCBI2NA_UNPACK_BASE(*s, left) != *--q) {
361
/** Perform mini extension (up to max_right <= 4 bases) to the right;
362
* @param q Pointer to the start of the extension in the query [in]
363
* @param s Pointer to a byte in the compressed subject sequence that is to be
364
* tested for extension [in]
365
* @param max_right Maximal number of bits to compare [in]
366
* @return Number of matched bases
368
static NCBI_INLINE Uint1
369
BlastNaMiniExtendRight(Uint1* q, const Uint1* s, Uint1 max_right)
374
for (right = 0; right < max_right; ++right, --index) {
375
if (NCBI2NA_UNPACK_BASE(*s, index) != *q++) {
382
/** Returns the index in the MaskLoc given a context number for the query.
383
* If the query is nucleotide
385
* @param is_na the query is nucleotide
386
* @param context offset in the QueryInfo array
387
* @return index in the maskloc
389
static NCBI_INLINE Int2 BlastGetMaskLocIndexFromContext(Boolean is_na, Int2 context)
391
return (is_na ? context / 2 : context);
394
/** Determines whether this is a nucleotide query and whether this a minus strand or not
396
* @param is_na the query is nucleotide
397
* @param context offset in the QueryInfo array
398
* @return TRUE if this is minus strand
400
static NCBI_INLINE Boolean BlastIsReverseStrand(Boolean is_na, Int2 context)
402
return (is_na && ((context & 1) != 0));