1
/* $Id: blast_filter.h,v 1.23 2004/09/13 12:40:35 madden Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* Author: Ilya Dondoshansky
30
/** @file blast_filter.h
31
* BLAST filtering functions. @todo FIXME: contains more than filtering
32
* functions, combine with blast_dust.h?
35
#ifndef __BLAST_FILTER__
36
#define __BLAST_FILTER__
38
#include <algo/blast/core/blast_def.h>
39
#include <algo/blast/core/blast_message.h>
45
/** Create and initialize a new sequence interval.
46
* @param head existing BlastSeqLoc to add onto, if *head
47
* is NULL then it will be set to new BlastSeqLoc, may be NULL [in|out]
48
* @param from Start of the interval [in]
49
* @param to End of the interval [in]
50
* @return Pointer to the allocated BlastSeqLoc structure.
53
BlastSeqLoc* BlastSeqLocNew(BlastSeqLoc** head, Int4 from, Int4 to);
55
/** Deallocate all BlastSeqLoc objects in a chain.
56
* @param loc object to be freed [in]
57
* @return NULL pointer returned.
60
BlastSeqLoc* BlastSeqLocFree(BlastSeqLoc* loc);
62
/** Deallocate memory for a BlastMaskLoc structure
63
* as well as the BlastSeqLoc's pointed to.
64
* @param mask_loc the object to be deleted [in]
65
* @return NULL pointer
68
BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc);
70
/** Allocate memory for a BlastMaskLoc, also allocates array for BlastSeqLoc* of length total.
71
* @param total which context (i.e., strand) [in]
72
* @return Pointer to the allocated BlastMaskLoc structure.
75
BlastMaskLoc* BlastMaskLocNew(Int4 total);
77
/** Go through all mask locations in one sequence,
78
* combine any that overlap. Deallocate the memory for the locations that
79
* were on the list, produce a new (merged) list of locations.
80
* @param mask_loc The list of masks to be merged [in]
81
* @param mask_loc_out The new (merged) list of masks. [out]
82
* @param link_value Largest gap size between locations fow which they
83
* should be linked together [in]
87
CombineMaskLocations(BlastSeqLoc* mask_loc, BlastSeqLoc* *mask_loc_out,
90
/** This function takes the list of mask locations (i.e., regions that
91
* should not be searched or not added to lookup table) and makes up a set
92
* of SSeqRange*'s in the concatenated sequence built from a set of queries,
93
* that should be searched (that is, takes the complement).
94
* If all sequences in the query set are completely filtered, then an
95
* SSeqRange is created and both of its elements (left and right) are set to
96
* -1 to indicate this.
97
* If any of the mask_loc's is NULL, an SSeqRange for the full span of the
98
* respective query sequence is created.
99
* @param program_number Type of BLAST program [in]
100
* @param query_info The query information structure [in]
101
* @param mask_loc All mask locations [in]
102
* @param complement_mask Linked list of SSeqRange*s in the concatenated
103
* sequence to be indexed in the lookup table . [out]
107
BLAST_ComplementMaskLocations(EBlastProgramType program_number,
108
BlastQueryInfo* query_info, BlastMaskLoc* mask_loc,
109
BlastSeqLoc* *complement_mask);
111
/** Runs filtering functions, according to the string "instructions", on the
112
* SeqLocPtr. Should combine all SeqLocs so they are non-redundant.
113
* @param program_number Type of BLAST program [in]
114
* @param sequence The sequence or part of the sequence to be filtered [in]
115
* @param length Length of the (sub)sequence [in]
116
* @param offset Offset into the full sequence [in]
117
* @param instructions String of instructions to filtering functions. [in]
118
* @param mask_at_hash If TRUE masking is done while making the lookup table
120
* @param seqloc_retval Resulting locations for filtered region. [out]
124
BlastSetUp_Filter(EBlastProgramType program_number,
128
const char* instructions,
129
Boolean *mask_at_hash,
130
BlastSeqLoc* *seqloc_retval);
133
/** Does preparation for filtering and then calls BlastSetUp_Filter
134
* @param query_blk sequence to be filtered [in]
135
* @param query_info info on sequence to be filtered [in]
136
* @param program_number one of blastn,blastp,blastx,etc. [in]
137
* @param filter_string instructions for filtering [in]
138
* @param filter_out resulting locations for filtered region. [out]
139
* @param mask_at_hash If TRUE masking is done while making the lookup table
141
* @param blast_message message that needs to be sent back to user.
145
BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info,
146
EBlastProgramType program_number, const char* filter_string, BlastMaskLoc** filter_out, Boolean* mask_at_hash,
147
Blast_Message* *blast_message);
149
/** Masks the letters in buffer.
150
* This is a low-level routine and takes a raw buffer which it assumes
151
* to be in ncbistdaa (protein) or blastna (nucleotide).
152
* @param buffer the sequence to be masked (will be modified). [out]
153
* @param length length of the sequence to be masked . [in]
154
* @param is_na nucleotide if TRUE [in]
155
* @param mask_loc the BlastSeqLoc to use for masking [in]
156
* @param reverse minus strand if TRUE [in]
157
* @param offset how far along sequence is 1st residuse in buffer [in]
162
Blast_MaskTheResidues(Uint1 * buffer, Int4 length, Boolean is_na,
163
BlastSeqLoc* mask_loc, Boolean reverse, Int4 offset);
165
/** Masks the sequence given a BlastMaskLoc
166
* @param query_blk sequence to be filtered [in]
167
* @param query_info info on sequence to be filtered [in]
168
* @param filter_maskloc Locations to filter [in]
169
* @param program_number one of blastn,blastp,blastx,etc. [in]
173
BlastSetUp_MaskQuery(BLAST_SequenceBlk* query_blk, BlastQueryInfo* query_info,
174
BlastMaskLoc *filter_maskloc, EBlastProgramType program_number);
180
#endif /* !__BLAST_FILTER__ */