31
31
* Structures and API used for saving BLAST hits
34
#ifndef __BLAST_HITS__
35
#define __BLAST_HITS__
34
#ifndef ALGO_BLAST_CORE__BLAST_HITS__H
35
#define ALGO_BLAST_CORE__BLAST_HITS__H
37
#include <algo/blast/core/ncbi_std.h>
38
#include <algo/blast/core/blast_export.h>
39
#include <algo/blast/core/blast_program.h>
40
#include <algo/blast/core/blast_query_info.h>
41
#include <algo/blast/core/blast_options.h>
37
42
#include <algo/blast/core/blast_parameters.h>
43
#include <algo/blast/core/blast_stat.h>
38
44
#include <algo/blast/core/gapinfo.h>
39
45
#include <algo/blast/core/blast_seqsrc.h>
40
46
#include <algo/blast/core/pattern.h>
257
263
const BlastInitialWordParameters* word_params,
258
264
BlastScoreBlk* sbp, Boolean translated);
260
/** Calculate number of identities in an HSP.
261
* @param query The query sequence [in]
262
* @param subject The uncompressed subject sequence [in]
263
* @param hsp All information about the HSP [in]
264
* @param num_ident_ptr Number of identities [out]
265
* @param align_length_ptr The alignment length, including gaps [out]
269
Blast_HSPGetNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp,
270
Int4* num_ident_ptr, Int4* align_length_ptr);
272
/** Calculate number of identities in an HSP for an out-of-frame alignment.
273
* @param query The query sequence [in]
274
* @param subject The uncompressed subject sequence [in]
275
* @param hsp All information about the HSP [in]
276
* @param program BLAST program (blastx or tblastn) [in]
277
* @param num_ident_ptr Number of identities [out]
278
* @param align_length_ptr The alignment length, including gaps [out]
282
Blast_HSPGetOOFNumIdentities(Uint1* query, Uint1* subject, BlastHSP* hsp,
283
EBlastProgramType program, Int4* num_ident_ptr,
284
Int4* align_length_ptr);
286
/** Calculates number of identities and alignment lengths of an HSP and
287
* determines whether this HSP should be kept or deleted. The num_ident
288
* field of the BlastHSP structure is filled here.
266
/** Calculate number of identities in an HSP and set the BlastHSP::num_ident
267
* field (unconditionally)
268
* @param query The query sequence [in]
269
* @param subject The uncompressed subject sequence [in]
270
* @param hsp All information about the HSP, the output of this function will
271
* be stored in its num_ident field [in|out]
272
* @param score_options Scoring options [in]
273
* @param align_length_ptr The alignment length, including gaps (optional) [out]
274
* @return 0 on success, -1 on invalid parameters or error
278
Blast_HSPGetNumIdentities(const Uint1* query,
279
const Uint1* subject,
281
const BlastScoringOptions* score_options,
282
Int4* align_length_ptr);
284
/** Calculates number of identities and alignment lengths of an HSP via
285
* Blast_HSPGetNumIdentities and determines whether this HSP should be kept or
289
287
* @param program_number Type of BLAST program [in]
290
288
* @param hsp An HSP structure [in] [out]
291
289
* @param query Query sequence [in]
393
391
Blast_HSPList_IsEmpty(const BlastHSPList* hsp_list);
393
/** Returns a duplicate (deep copy) of the given hsp list. */
394
BlastHSPList* BlastHSPListDup(const BlastHSPList* hsp_list);
396
/** Swaps the two HSP lists via structure assignment */
397
void Blast_HSPListSwap(BlastHSPList* list1, BlastHSPList* list2);
395
399
/** Saves HSP information into a BlastHSPList structure
396
400
* @param hsp_list Structure holding all HSPs with full gapped alignment
397
401
* information [in] [out]
481
485
Blast_HSPListPurgeHSPsWithCommonEndpoints(EBlastProgramType program,
482
486
BlastHSPList* hsp_list);
484
/** Reevaluate all HSPs in an HSP list, using ambiguity information.
488
/** Reevaluate all ungapped HSPs in an HSP list, using ambiguity information.
485
489
* This is/can only done either for an ungapped search, or if traceback is
486
490
* already available.
487
491
* Subject sequence is uncompressed and saved here. Number of identities is
504
508
NCBI_XBLAST_EXPORT
506
Blast_HSPListReevaluateWithAmbiguities(EBlastProgramType program,
510
Blast_HSPListReevaluateWithAmbiguitiesUngapped(EBlastProgramType program,
507
511
BlastHSPList* hsp_list, BLAST_SequenceBlk* query_blk,
508
512
BLAST_SequenceBlk* subject_blk,
509
513
const BlastInitialWordParameters* word_params,
528
532
* @param hsp_list Contains HSPs from the new chunk [in]
529
533
* @param combined_hsp_list_ptr Contains HSPs from previous chunks [in] [out]
530
534
* @param hsp_num_max Maximal allowed number of HSPs to save (unlimited if INT4_MAX) [in]
531
* @param start Offset where the current subject chunk starts [in]
532
* @param merge_hsps Should the overlapping HSPs be merged into one? [in]
535
* @param split_points Offset The sequence offset (query or subject) that is
536
* the boundary between HSPs in combined_hsp_list and hsp_list. [in]
537
* @param contexts_per_query If positive, the number of query contexts
538
* that hits can contain. If negative, the (one) split
539
* point occurs on the subject sequence [in]
540
* @param chunk_overlap_size The length of the overlap region between the
541
* sequence region containing hsp_list and that
542
* containing combined_hsp_list [in]
533
543
* @return 0 if HSP lists have been merged successfully, -1 otherwise.
535
545
NCBI_XBLAST_EXPORT
536
546
Int2 Blast_HSPListsMerge(BlastHSPList** hsp_list,
537
547
BlastHSPList** combined_hsp_list_ptr,
538
Int4 hsp_num_max, Int4 start, Boolean merge_hsps);
548
Int4 hsp_num_max, Int4* split_points,
549
Int4 contexts_per_query,
550
Int4 chunk_overlap_size);
540
552
/** Adjust subject offsets in an HSP list if only part of the subject sequence
541
553
* was searched. Used when long subject sequence is split into more manageable
616
628
NCBI_XBLAST_EXPORT
617
629
Int2 Blast_HitListUpdate(BlastHitList* hit_list, BlastHSPList* hsp_list);
631
/** Combine two hitlists; both HitLists must contain HSPs that
632
* represent alignments to the same query sequence
633
* @param old_hit_list_ptr Pointer to original HitList, will be NULLed
634
* out on return [in|out]
635
* @param combined_hit_list_ptr Pointer to the combined HitList [in|out]
636
* @param contexts_per_query The number of different contexts that can
637
* occur in hits from old_hit_list and combined_hit_list [in]
638
* @param split_offsets the query offset that marks the boundary between
639
* combined_hit_list and old_hit_list. HSPs in old_hit_list
640
* that hit to context i are assumed to lie to the right
641
* of split_offsets[i] [in]
642
* @param chunk_overlap_size The length of the overlap region between the
643
* sequence region containing hit_list and that
644
* containing combined_hit_list [in]
647
Int2 Blast_HitListMerge(BlastHitList** old_hit_list_ptr,
648
BlastHitList** combined_hit_list_ptr,
649
Int4 contexts_per_query, Int4 *split_offsets,
650
Int4 chunk_overlap_size);
619
652
/** Purges a BlastHitList of NULL HSP lists.
620
653
* @param hit_list BLAST hit list to purge. [in] [out]