1
/* $Id: hspfilter_besthit.h,v 1.2 2009/06/01 16:04:31 kazimird Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
30
/** @file hspfilter_besthit.h
31
* Implementation of a number of BlastHSPWriters to save hits from
32
* a BLAST search, and subsequently return them in sorted order.
35
#ifndef ALGO_BLAST_CORE__HSPFILTER_BESTHIT__H
36
#define ALGO_BLAST_CORE__HSPFILTER_BESTHIT__H
38
#include <algo/blast/core/ncbi_std.h>
39
#include <algo/blast/core/blast_program.h>
40
#include <algo/blast/core/blast_options.h>
41
#include <algo/blast/core/blast_hspfilter.h>
42
#include <algo/blast/core/blast_hits.h>
43
#include <connect/ncbi_core.h>
49
/************************************************************************/
50
/** The "best hit" writer
52
Prune the hsp_list for each query and keeps only the best ones.
53
1. For a pair of hits A and B, check based on 10% overhangs whether
54
A can be dropped because of B due to end points of A being within
55
10% extension of B and vice versa. Note that this would allow A
56
to be dropped even if it is at most 20% longer than B.
58
2. If A can be dropped because of B, check if Evalue(A) >= Evalue(B);
59
that is A has the same or worse evalue than B. Do the same check for
60
whether B can be dropped because of A.
62
3. If A can still be dropped because of B, check if density(A) <= density(B).
63
Do the same check for whether B can be dropped because of A.
65
4. If only one can be dropped, then drop that one. If both are mutually
66
replaceable, use length criteria and drop the shorter one
67
only if it is at least 10% shorter (90% coverage).
69
So, essentially length coverage is being used a tie-breaker and if the
70
tie-breaker does not break the tie, both alignments are kept. Above is not
71
very different than what you have now, just rearranged in conditions
72
so that we do not have non-deterministic behavior between a pair of
73
alignments. We could still have issues with cascades where A was dropped
74
because of B and then B gets dropped because of C, but A would not have
75
been dropped because of C becuase of condition 4. However, I think this
76
will be extremely rare.
79
/// Default value for overhang
80
#define kBestHit_OverhangDflt 0.1
81
/// Minimum value for overhang
82
#define kBestHit_OverhangMin 0.0
83
/// Maximum value for overhang
84
#define kBestHit_OverhangMax 0.5
86
/// Default value for score_edge
87
#define kBestHit_ScoreEdgeDflt 0.1
88
/// Minimum value for score_edge
89
#define kBestHit_ScoreEdgeMin 0.0
90
/// Maximum value for score_edge
91
#define kBestHit_ScoreEdgeMax 0.5
93
/** Keeps parameters used in best hit algorithm.*/
94
typedef struct BlastHSPBestHitParams {
95
EBlastProgramType program;/**< program type. */
96
Int4 prelim_hitlist_size; /**< number of hits saved during preliminary
98
double overhang; /**< overhang used in condition 1. */
99
double score_edge; /**< fraction of score margin in condition 4*/
100
} BlastHSPBestHitParams;
102
/** create a set of parameters
103
* @param program Blast program type.[in]
104
* @param hit_options field hitlist_size and hsp_num_max needed, a pointer to
105
* this structure will be stored on resulting structure.[in]
106
* @param overhang Specifies the ratio of overhang to length, which is used to
107
determine if hit A is contained in hit B
108
* @return the pointer to the allocated parameter
111
BlastHSPBestHitParams*
112
BlastHSPBestHitParamsNew(const BlastHitSavingOptions* hit_options,
113
const BlastHSPBestHitOptions* best_hit_opts);
115
/** Deallocates the BlastHSPBestHitParams structure passed in
116
* @param opts structure to deallocate [in]
120
BlastHSPBestHitParams*
121
BlastHSPBestHitParamsFree(BlastHSPBestHitParams* opts);
123
/** WriterInfo and PipeInfo to create a best hit writer/pipe
124
* @param params Specifies writer parameters. [in]
125
* @return the newly allocated writer/pipe info
129
BlastHSPBestHitInfoNew(BlastHSPBestHitParams* params);
133
BlastHSPBestHitPipeInfoNew(BlastHSPBestHitParams* params);
139
#endif /* !ALGO_BLAST_CORE__HSPFILTER_BESTHIT__H */