~ubuntu-branches/ubuntu/oneiric/ncbi-tools6/oneiric

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/*  $Id: blast_tune.h,v 1.3 2006/11/21 16:47:16 papadopo Exp $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  Jason Papadopoulos
 *
 */

/** @file blast_tune.h
 * Compute a blastn word size appropriate for finding,
 * with high probability, alignments with specified length and 
 * percent identity.
 */

#ifndef ALGO_BLAST_CORE___BLAST_TUNE__H
#define ALGO_BLAST_CORE___BLAST_TUNE__H

#include <algo/blast/core/ncbi_std.h>
#include <algo/blast/core/blast_export.h>

/** @addtogroup AlgoBlast
 *
 * @{
 */

#ifdef __cplusplus
extern "C" {
#endif

/** Given a minimum amount of identity and the minimum desired length
 *  of nucleotide alignments, find the largest blastn word size that
 *  will find random instances of those alignments with high 
 *  probability. Note that when blast is actually run, it is obviously
 *  still possible to find alignments that are shorter and/or have less
 *  identity than what is specified here. The returned word size is
 *  just a choice that makes it unlikely that ungapped blast will 
 *  miss alignments that exceed *both* minimums. The algorithm used 
 *  is described in
 *
 *  <PRE>
 *  Valer Gotea, Vamsi Veeramachaneni, and Wojciech Makalowski
 *  "Mastering seeds for genomic size nucleotide BLAST searches"
 *  Nucleic Acids Research, 2003, Vol 31, No. 23, pp 6935-6941
 *  </PRE>
 *
 * @param min_percent_identity How much identity is expected in
 *              random alignments. Less identity means the probability of
 *              finding such alignments is decreased [in]
 * @param min_align_length The smallest alignment length desired.
 *              Longer length gives blastn more leeway to find seeds
 *              and increases the computed probability that alignments
 *              will be found [in]
 * @return The optimal word size, or zero if the optimization 
 *         process failed
 */
NCBI_XBLAST_EXPORT
Int4 BLAST_FindBestNucleotideWordSize(double min_percent_identity,
                                      Int4 min_align_length);

#ifdef __cplusplus
}
#endif

/* @} */

#endif  /* ALGO_BLAST_CORE___BLAST_TUNE__H */