4
* Created by Mengyao Zhao on 6/22/10.
5
* Copyright 2010 Boston College. All rights reserved.
7
* Last revision by Mengyao Zhao on 07/31/12.
17
#include <emmintrin.h>
19
/*! @typedef structure of the query profile */
21
typedef struct _profile s_profile;
23
/*! @typedef structure of the alignment result
24
@field score1 the best alignment score
25
@field score2 sub-optimal alignment score
26
@field ref_begin1 0-based best alignment beginning position on reference; ref_begin1 = -1 when the best alignment beginning
27
position is not available
28
@field ref_end1 0-based best alignment ending position on reference
29
@field read_begin1 0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
30
position is not available
31
@field read_end1 0-based best alignment ending position on read
32
@field read_end2 0-based sub-optimal alignment ending position on read
33
@field cigar best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2);
34
cigar = 0 when the best alignment path is not available
35
@field cigarLen length of the cigar string; cigarLen = 0 when the best alignment path is not available
53
/*! @function Create the query profile using the query sequence.
54
@param read pointer to the query sequence; the query sequence needs to be numbers
55
@param readLen length of the query sequence
56
@param mat pointer to the substitution matrix; mat needs to be corresponding to the read sequence
57
@param n the square root of the number of elements in mat (mat has n*n elements)
58
@param score_size estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
59
your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
60
@return pointer to the query profile structure
61
@note example for parameter read and mat:
62
If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142
63
Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be:
69
mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2}
71
s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size);
73
/*! @function Release the memory allocated by function ssw_init.
74
@param p pointer to the query profile structure
76
void init_destroy (s_profile* p);
78
// @function ssw alignment.
79
/*! @function Do Striped Smith-Waterman alignment.
80
@param prof pointer to the query profile structure
81
@param ref pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of
83
@param refLen length of the target sequence
84
@param weight_gapO the absolute value of gap open penalty
85
@param weight_gapE the absolute value of gap extension penalty
86
@param flag bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment
87
beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
88
< filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
89
cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
90
will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
91
setted) the function will always return the best alignment beginning position and cigar
92
@param filters score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
93
decription of the flag parameter for detailed usage.)
94
@param filterd distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
95
the decription of the flag parameter for detailed usage.)
96
@param maskLen The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
97
readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
98
return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
99
alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
100
largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
101
picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
102
reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
103
score from the unmasked elements.
104
@return pointer to the alignment result structure
105
@note Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
106
and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
107
while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
110
s_align* ssw_align (const s_profile* prof,
113
const uint8_t weight_gapO,
114
const uint8_t weight_gapE,
116
const uint16_t filters,
117
const int32_t filterd,
118
const int32_t maskLen);
120
/*! @function Release the memory allocated by function ssw_align.
121
@param a pointer to the alignment result structure
123
void align_destroy (s_align* a);
127
#endif // __cplusplus