~ubuntu-branches/ubuntu/feisty/ncbi-tools6/feisty

« back to all changes in this revision

Viewing changes to algo/blast/core/blast_tune.c

Committer: Bazaar Package Importer
Author(s): Barry deFreese
Date: 2006-07-19 23:28:07 UTC
mfrom: (1.1.5 upstream)
Revision ID: james.westby@ubuntu.com-20060719232807-et3cdmcjgmnyleyx

Tags: 6.1.20060507-3ubuntu1

Re-merge with Debian

files added:
algo/blast/api/blast_message_api.c

algo/blast/api/blast_message_api.h

algo/blast/composition_adjustment/unified_pvalues.c

algo/blast/composition_adjustment/unified_pvalues.h

algo/blast/core/blast_query_info.c

algo/blast/core/blast_query_info.h

algo/blast/core/blast_tune.c

algo/blast/core/blast_tune.h

connect/ncbi_lb.c

connect/ncbi_lb.h

connect/ncbi_local.c

connect/ncbi_local.h

connect/test/test_assert_impl.h

data/16Score.nhr

data/16Score.nin

data/16Score.nsq

debian/libvibrant6a.install

doc/man/cleanasn.1

doc/man/insdseqget.1

network/wwwblast/Src/Makefile.wblast2_cs

network/wwwblast/Src/showalignwrap.cpp

network/wwwblast/Src/showalignwrap.h

files removed:
debian/libvibrant6.install

files modified:
VERSION

access/ent2api.c

access/ent2api.h

access/makeacc.unx

access/mim.asn

access/pmfapi.c

access/pmfapi.h

algo/blast/api/blast_api.c

algo/blast/api/blast_api.h

algo/blast/api/blast_format.c

algo/blast/api/blast_format.h

algo/blast/api/blast_input.c

algo/blast/api/blast_input.h

algo/blast/api/blast_options_api.c

algo/blast/api/blast_options_api.h

algo/blast/api/blast_returns.c

algo/blast/api/blast_returns.h

algo/blast/api/blast_seq.c

algo/blast/api/blast_seq.h

algo/blast/api/blast_seqalign.c

algo/blast/api/blast_seqalign.h

algo/blast/api/blast_tabular.c

algo/blast/api/blast_tabular.h

algo/blast/api/dust_filter.c

algo/blast/api/repeats_filter.c

algo/blast/api/repeats_filter.h

algo/blast/api/seqsrc_readdb.c

algo/blast/api/twoseq_api.c

algo/blast/api/twoseq_api.h

algo/blast/composition_adjustment/compo_heap.c

algo/blast/composition_adjustment/compo_heap.h

algo/blast/composition_adjustment/compo_mode_condition.c

algo/blast/composition_adjustment/compo_mode_condition.h

algo/blast/composition_adjustment/composition_adjustment.c

algo/blast/composition_adjustment/composition_adjustment.h

algo/blast/composition_adjustment/composition_constants.h

algo/blast/composition_adjustment/matrix_frequency_data.c

algo/blast/composition_adjustment/matrix_frequency_data.h

algo/blast/composition_adjustment/nlm_linear_algebra.c

algo/blast/composition_adjustment/nlm_linear_algebra.h

algo/blast/composition_adjustment/optimize_target_freq.c

algo/blast/composition_adjustment/optimize_target_freq.h

algo/blast/composition_adjustment/redo_alignment.c

algo/blast/composition_adjustment/redo_alignment.h

algo/blast/composition_adjustment/smith_waterman.c

algo/blast/composition_adjustment/smith_waterman.h

algo/blast/core/aa_ungapped.c

algo/blast/core/aa_ungapped.h

algo/blast/core/blast_def.h

algo/blast/core/blast_diagnostics.c

algo/blast/core/blast_diagnostics.h

algo/blast/core/blast_encoding.c

algo/blast/core/blast_encoding.h

algo/blast/core/blast_engine.c

algo/blast/core/blast_engine.h

algo/blast/core/blast_extend.c

algo/blast/core/blast_filter.c

algo/blast/core/blast_gapalign.c

algo/blast/core/blast_gapalign.h

algo/blast/core/blast_hits.c

algo/blast/core/blast_hits.h

algo/blast/core/blast_hits_priv.h

algo/blast/core/blast_inline.h

algo/blast/core/blast_kappa.c

algo/blast/core/blast_kappa.h

algo/blast/core/blast_lookup.c

algo/blast/core/blast_lookup.h

algo/blast/core/blast_message.c

algo/blast/core/blast_message.h

algo/blast/core/blast_options.c

algo/blast/core/blast_options.h

algo/blast/core/blast_parameters.c

algo/blast/core/blast_parameters.h

algo/blast/core/blast_program.c

algo/blast/core/blast_program.h

algo/blast/core/blast_psi.c

algo/blast/core/blast_psi_priv.c

algo/blast/core/blast_psi_priv.h

algo/blast/core/blast_setup.c

algo/blast/core/blast_stat.c

algo/blast/core/blast_stat.h

algo/blast/core/blast_traceback.c

algo/blast/core/blast_traceback.h

algo/blast/core/blast_util.c

algo/blast/core/blast_util.h

algo/blast/core/gapinfo.c

algo/blast/core/gapinfo.h

algo/blast/core/greedy_align.c

algo/blast/core/lookup_util.c

algo/blast/core/lookup_util.h

algo/blast/core/lookup_wrap.c

algo/blast/core/mb_lookup.c

algo/blast/core/mb_lookup.h

algo/blast/core/ncbi_std.h

algo/blast/core/pattern.c

algo/blast/core/pattern.h

algo/blast/core/phi_gapalign.c

algo/blast/core/phi_lookup.c

algo/blast/core/phi_lookup.h

api/alignval.c

api/asn2gnb1.c

api/asn2gnb2.c

api/asn2gnb3.c

api/asn2gnb4.c

api/asn2gnb5.c

api/asn2gnb6.c

api/asn2gnbi.h

api/asn2gnbk.h

api/edutil.c

api/edutil.h

api/explore.h

api/findrepl.c

api/findrepl.h

api/salsap.c

api/salsap.h

api/seqmgr.c

api/seqmgr.h

api/seqport.c

api/sequtil.c

api/sequtil.h

api/sqnutil1.c

api/sqnutil2.c

api/sqnutil3.c

api/sqnutils.h

api/subutil.c

api/subutil.h

api/tofasta.c

api/txalign.c

api/utilpars.c

api/utilpars.h

api/valid.c

api/valid.h

api/valid.msg

api/validerr.h

asn/asn.all

asn/gbseq.asn

asn/insdseq.asn

asn/seq.asn

asnstat/all.h

asnstat/asngbseq.h

asnstat/asninsdseq.h

asnstat/asnmim.h

asnstat/asnseq.h

asnstat/mmdb1.h

biostruc/cdd/cdd.asn

biostruc/cdd/cdd.h

biostruc/cdd/objcdd.c

biostruc/cdd/objcdd.h

biostruc/mmdb1.asn

biostruc/objmmdb1.c

biostruc/objmmdb1.h

checkout.date

connect/ncbi_ansi_ext.c

connect/ncbi_ansi_ext.h

connect/ncbi_assert.h

connect/ncbi_comm.h

connect/ncbi_config.h

connect/ncbi_connection.c

connect/ncbi_connutil.c

connect/ncbi_connutil.h

connect/ncbi_core.c

connect/ncbi_core_c.c

connect/ncbi_dispd.c

connect/ncbi_file_connector.c

connect/ncbi_ftp_connector.c

connect/ncbi_heapmgr.c

connect/ncbi_heapmgr.h

connect/ncbi_host_info.c

connect/ncbi_host_info.h

connect/ncbi_host_infop.h

connect/ncbi_http_connector.c

connect/ncbi_http_connector.h

connect/ncbi_lbsmd.h

connect/ncbi_lbsmd_stub.c

connect/ncbi_memory_connector.c

connect/ncbi_memory_connector.h

connect/ncbi_priv.h

connect/ncbi_sendmail.c

connect/ncbi_server_info.c

connect/ncbi_server_info.h

connect/ncbi_server_infop.h

connect/ncbi_service.c

connect/ncbi_service.h

connect/ncbi_service_connector.c

connect/ncbi_service_connector.h

connect/ncbi_service_misc.h

connect/ncbi_servicep.h

connect/ncbi_socket.c

connect/ncbi_socket.h

connect/ncbi_socket_connector.c

connect/ncbi_types.h

connect/ncbi_util.c

connect/ncbi_util.h

connect/test/test_assert.h

connect/test/test_ncbi_connutil_hit.c

connect/test/test_ncbi_connutil_misc.c

connect/test/test_ncbi_disp.c

connect/test/test_ncbi_dsock.c

connect/test/test_ncbi_heapmgr.c

connect/test/test_ncbi_memory_connector.c

connect/test/test_ncbi_sendmail.c

connect/test/test_ncbi_service_connector.c

connect/test/test_ncbi_socket.c

connect/urlquery.c

connect/urlquery.h

data/seqcode.prt

data/seqcode.val

data/sequin.hlp

debian/changelog

debian/compat

debian/control

debian/libncbi6-dev.install

debian/libncbi6.install

debian/ncbi-tools-bin.install

debian/rules

demo/.BLAST_VERSION

demo/asn2all.c

demo/asn2gb.c

demo/asnval.c

demo/bl2seq.c

demo/blast_driver.c

demo/blastall.c

demo/blastpgp.c

demo/cleanasn.c

demo/copymat.c

demo/fa2htgs.c

demo/formatdb.c

demo/formatrpsdb.c

demo/gi2accn.c

demo/megablast.c

demo/rpsblast.c

demo/scantest.c

demo/seedtop.c

demo/tbl2asn.c

demo/vecscreen.c

desktop/biosrc.c

desktop/bspview.c

desktop/cdrgn.c

desktop/dlgutil1.c

desktop/dlgutil2.c

desktop/dlogutil.h

desktop/e2docsum.c

desktop/gbfview.c

desktop/gphview.c

desktop/import.c

desktop/saledit.h

desktop/salpanel.c

desktop/salpanel.h

desktop/salsa.c

desktop/seqpanel.c

desktop/seqpanel.h

desktop/vsm.c

doc/asn2gb.txt

doc/blast/blastftp.html

doc/blast/netblast.html

doc/blast/scoring.pdf

doc/blast/seedtop.html

doc/blast/web_blast.pl

doc/man/asn2gb.1

doc/man/asnval.1

doc/man/blast.1

doc/man/fa2htgs.1

doc/man/gbseqget.1

doc/man/tbl2asn.1

doc/sequin.htm

errmsg/valid.msg

link/macmet/setrsrc.c

make/makeLibs.met

make/makeall.unx

make/makedis.csh

make/makenet.unx

make/msvc_prj/algo/blast/api/blastapi.dsp

make/msvc_prj/algo/blast/composition_adjustment/blastcompadj.dsp

make/msvc_prj/algo/blast/core/blast.dsp

make/msvc_prj/connect/connect.dsp

make/xCode/NCBI.xcode/project.pbxproj

network/id2arch/id2.asn

network/nsclilib/ni_service.c

network/wwwblast/Src/test/run.pl

network/wwwblast/Src/viewgif.c

network/wwwblast/Src/wblast2.c

object/objbibli.c

object/objcode.c

object/objfeat.c

object/objgbseq.c

object/objgbseq.h

object/objinsdseq.c

object/objinsdseq.h

object/objpub.c

object/objpubd.h

sequin/sbtedit.c

sequin/sequin.h

sequin/sequin1.c

sequin/sequin10.c

sequin/sequin2.c

sequin/sequin3.c

sequin/sequin4.c

sequin/sequin5.c

sequin/sequin6.c

sequin/sequin7.c

sequin/sequin8.c

sequin/sequin9.c

tools/actutils.c

tools/actutils.h

tools/blast.h

tools/blastdef.h

tools/blastkar.c

tools/blastool.c

tools/blastpri.h

tools/blastutl.c

tools/blfmtutl.c

tools/blfmtutl.h

tools/kappa.c

tools/mbalign.c

tools/pseed3.c

tools/readdb.c

tools/readdb.h

tools/salptool.c

tools/salptool.h

tools/taxblast.c

tools/toporg.c

tools/toporg.h

tools/vecscrn.c

util/creaders/alnread.c

vibrant/vibforms.c

Show diffs side-by-side

added added

removed removed

algo/blast/core/blast_tune.c

/* $Id: blast_tune.c,v 1.1 2006/04/19 17:40:28 papadopo Exp $

* ===========================================================================

* PUBLIC DOMAIN NOTICE

* National Center for Biotechnology Information

* This software/database is a "United States Government Work" under the

* terms of the United States Copyright Act. It was written as part of

* the author's official duties as a United States Government employee and

* thus cannot be copyrighted. This software/database is freely available

* to the public for use. The National Library of Medicine and the U.S.

* Government have not placed any restriction on its use or reproduction.

* Although all reasonable efforts have been taken to ensure the accuracy

* and reliability of the software and data, the NLM and the U.S.

* Government do not and cannot warrant the performance or results that

* may be obtained by using this software or data. The NLM and the U.S.

* Government disclaim all warranties, express or implied, including

* warranties of performance, merchantability or fitness for any particular

* purpose.

* Please cite the author in any work or product based on this material.

* ===========================================================================

* Author: Jason Papadopoulos

/** @file blast_tune.c

* Routines that compute a blastn word size appropriate for finding,

* with high probability, alignments with specified length and

* percent identity.

#ifndef SKIP_DOXYGEN_PROCESSING

static char const rcsid[] =

"$Id: blast_tune.c,v 1.1 2006/04/19 17:40:28 papadopo Exp $";

#endif /* SKIP_DOXYGEN_PROCESSING */

#include <algo/blast/core/blast_def.h>

#include <algo/blast/core/blast_tune.h>

/** structure containing intermediate data to be processed */

typedef struct MatrixData {

Int4 matrix_dim_alloc; /**< max matrix size */

Int4 matrix_dim; /**< current matrix size */

double hit_probability; /**< for the current Markov chain, the

probability that blastn will find

a hit of specified length with

specified identity */

double percent_identity; /**< the target percent identity, used

to choose the blastn word size */

double *power_matrix; /**< space for iterated Markov chain */

double *prod_matrix; /**< scratch space for matrix multiply */

} MatrixData;

/** the probability that a random alignment will be found.

Given particulars about the alignment, we will attempt

to compute the largest blastn word size that has at least

this probability of finding a random alignment */

#define TARGET_HIT_PROB 0.98

/** initialize intermediate state. Note that memory for

* the matrices gets allocated later.

* @param m pointer to intermediate state [in][out]

* @return -1 if m is NULL, zero otherwise

static Int2 s_MatrixDataInit(MatrixData *m)

{

if (m == NULL)

return -1;

memset(m, 0, sizeof(MatrixData));

return 0;

}

/** Free previously allocated scratch data

* @param m pointer to intermediate state [in][out]

static void s_MatrixDataFree(MatrixData *m)

{

if (m != NULL) {

sfree(m->power_matrix);

sfree(m->prod_matrix);

}

/** Set up for the next calculation of hit probability.

* @param m Space for the Markov chain calculation [in][out]

* @param new_word_size The blastn word size to be used

* for the current test. The internally generated

* matrix has dimension one larger than this [in]

* @param percent_identity The desired amount of identity in

* alignments. A fractional number (0...1) [in]

* @return 0 if successful

static Int2 s_MatrixDataReset(MatrixData *m,

Int4 new_word_size,

100

double percent_identity)

101

{

102

if (m == NULL)

103

return -1;

104

105

m->hit_probability = 0.0;

106

m->percent_identity = percent_identity;

107

m->matrix_dim = new_word_size + 1;

108

109

/* reallocate the two scratch matrices only if the new

110

matrix dimension exceeds the amount of space previously

111

allocated */

112

if (m->matrix_dim > m->matrix_dim_alloc) {

113

114

Int4 num_cells = m->matrix_dim * m->matrix_dim;

115

m->matrix_dim_alloc = m->matrix_dim;

116

m->power_matrix = (double *)realloc(m->power_matrix,

117

num_cells * sizeof(double));

118

m->prod_matrix = (double *)realloc(m->prod_matrix,

119

num_cells * sizeof(double));

120

121

if (m->power_matrix == NULL || m->prod_matrix == NULL) {

122

sfree(m->power_matrix);

123

sfree(m->prod_matrix);

124

return -2;

125

}

126

}

127

return 0;

128

}

129

130

/** Loads the initial value for matrix exponentiation. This is

131

* the starting Markov chain described in the reference.

132

* @param matrix The matrix to be initialized [in][out]

133

* @param matrix_dim Dimension of the matrix [in]

134

* @param identity The desired amount of identity in

135

* alignments. A fractional number (0...1) [in]

136

137

static void s_SetInitialMatrix(double *matrix,

138

Int4 matrix_dim,

139

double identity)

140

{

141

Int4 i;

142

double *row;

143

144

memset(matrix, 0, matrix_dim * matrix_dim * sizeof(double));

145

146

for (i = 0, row = matrix; i < matrix_dim - 1;

147

i++, row += matrix_dim) {

148

row[0] = 1.0 - identity;

149

row[i+1] = identity;

150

}

151

row[i] = 1.0;

152

}

153

154

/** Multiply the current exponentiated matrix by the original

155

* state transition matrix. Since the latter is very sparse and

156

* has a regular structure, this operation is essentially

157

* instantaneous compared to an ordinary matrix-matrix multiply

158

* @param a Matrix to multiply [in]

159

* @param identity The desired amount of identity in

160

* alignments. A fractional number (0...1). Note that

161

* this is the only information needed to create the

162

* state transition matrix, and its structure is sufficiently

163

* regular that the matrix can be implicitly used [in]

164

* @param prod space for the matrix product [out]

165

* @param dim The dimension of all matrices [in]

166

167

static void s_MatrixMultiply(double *a,

168

double identity,

169

double *prod, Int4 dim)

170

{

171

Int4 i, j;

172

double *prod_row;

173

double *a_row;

174

double comp_identity = 1.0 - identity;

175

176

/* compute the first column of the product */

177

a_row = a;

178

prod_row = prod;

179

for (i = 0; i < dim; i++) {

180

181

double accum = 0;

182

for (j = 0; j < dim - 1; j++)

183

accum += a_row[j];

184

185

prod_row[0] = comp_identity * accum;

186

a_row += dim;

187

prod_row += dim;

188

}

189

190

/* computed the second to the last columns */

191

a_row = a;

192

prod_row = prod;

193

for (i = 0; i < dim; i++) {

194

for (j = 1; j < dim; j++) {

195

prod_row[j] = identity * a_row[j-1];

196

}

197

a_row += dim;

198

prod_row += dim;

199

}

200

201

/* modify the last column slightly */

202

a_row = a + dim - 1;

203

prod_row = prod + dim - 1;

204

for (i = 0; i < dim; i++) {

205

prod_row[0] += a_row[0];

206

a_row += dim;

207

prod_row += dim;

208

}

209

}

210

211

/** Multiply a square matrix by itself

212

* @param a The matrix [in]

213

* @param prod Space to store the product [out]

214

* @param dim The matrix dimesnion [in]

215

216

static void s_MatrixSquare(double *a, double *prod, Int4 dim)

217

{

218

Int4 i, j, k;

219

double *prod_row = prod;

220

double *a_row = a;

221

Int4 full_entries = dim & ~3;

222

223

/* matrix multiplication is probably the most heavily

224

studied computational problem, and there are many

225

high-quality implementations for computing matrix

226

products. All of them 1) are enormously faster than

227

this implementation, 2) are far more complicated than

228

is practical, 3) are optimized for matrix sizes much

229

larger than are dealt with here, and 4) are not worth

230

adding a dependency on a BLAS implementation just for

231

this application. The following is 'fast enough' */

232

233

for (i = 0; i < dim; i++, prod_row += dim, a_row += dim) {

234

235

for (j = 0; j < dim; j++) {

236

237

double *a_col = a + j;

238

double accum = 0;

239

for (k = 0; k < full_entries; k += 4, a_col += 4 * dim) {

240

accum += a_row[k] * a_col[0] +

241

a_row[k+1] * a_col[dim] +

242

a_row[k+2] * a_col[2*dim] +

243

a_row[k+3] * a_col[3*dim];

244

}

245

for (; k < dim; k++, a_col += dim) {

246

accum += a_row[k] * a_col[0];

247

}

248

249

prod_row[j] = accum;

250

}

251

}

252

}

253

254

/** swap two matrices by swapping pointers to them */

255

#define SWAP_MATRIX(a,b) { \

256

double *tmp = (a); \

257

(a) = (b); \

258

(b) = tmp; \

259

}

260

261

/** For fixed word size and alignment properties, compute

262

* the probability that blastn with that word size will

263

* find a seed within a random alignment.

264

* @param m Space for the Markov chain calculation [in][out]

265

* @param word_size The blastn word size [in]

266

* @param min_percent_identity How much identity is expected in

267

* random alignments. Less identity means the probability of

268

* finding such alignments is decreased [in]

269

* @param min_align_length The smallest alignment length desired.

270

* Longer length gives blastn more leeway to find seeds

271

* and increases the computed probability that alignments

272

* will be found [in]

273

* @return 0 if the probability was successfully computed

274

275

static Int2 s_FindHitProbability(MatrixData *m,

276

Int4 word_size,

277

double min_percent_identity,

278

Int4 min_align_length)

279

{

280

Uint4 mask;

281

Int4 num_squares = 0;

282

283

if (min_align_length == 0)

284

return -3;

285

286

if (s_MatrixDataReset(m, word_size, min_percent_identity))

287

return -4;

288

289

/* initialize the matrix of state transitions */

290

s_SetInitialMatrix(m->power_matrix, m->matrix_dim,

291

min_percent_identity);

292

293

/* Exponentiate the starting matrix. The probability desired

294

is the top right entry of the resulting matrix. Use left-to-

295

right binary exponentiation, since this allows the original

296

(very sparse) transition matrix to be used throughout the

297

exponentiation process */

298

299

mask = (Uint4)(0x80000000);

300

while (!(min_align_length & mask))

301

mask = mask / 2;

302

303

for (mask = mask / 2, num_squares = 0; mask;

304

mask = mask / 2, num_squares++) {

305

306

if (num_squares == 0)

307

s_MatrixMultiply(m->power_matrix, m->percent_identity,

308

m->prod_matrix, m->matrix_dim);

309

else

310

s_MatrixSquare(m->power_matrix, m->prod_matrix, m->matrix_dim);

311

SWAP_MATRIX(m->prod_matrix, m->power_matrix);

312

313

if (min_align_length & mask) {

314

s_MatrixMultiply(m->power_matrix, m->percent_identity,

315

m->prod_matrix, m->matrix_dim);

316

SWAP_MATRIX(m->prod_matrix, m->power_matrix);

317

}

318

}

319

320

m->hit_probability = m->power_matrix[m->matrix_dim - 1];

321

return 0;

322

}

323

324

325

/** For specified alignment properties, compute the blastn word size

326

* that will cause random alignments with those properties to be

327

* found with specified (high) probability.

328

* @param m Space for the Markov chain calculation [in][out]

329

* @param min_percent_identity How much identity is expected in

330

* random alignments [in]

331

* @param min_align_length The smallest alignment length desired [in]

332

* @return The optimal word size, or zero if the optimization

333

* process failed

334

335

static Int4 s_FindWordSize(MatrixData *m,

336

double min_percent_identity,

337

Int4 min_align_length)

338

{

339

const double k_min_w = 4; /* minimum acceptable word size */

340

const double k_max_w = 110; /* maximum acceptable word size */

341

double w0, p0;

342

double w1, p1;

343

344

/* we treat the optimization problem as an exercise in

345

rootfinding, and use bisection. Bisection is appropriate

346

here because the root does not need to be found to

347

high accuracy (since the final word size must be an

348

integer) and because the function described by

349

s_FindHitProbability is monotonically decreasing but

350

can drop off very sharply, i.e. can still be badly behaved.

351

352

Begin by bracketing the target probability. The initial range

353

should be appropriate for common searches */

354

355

w1 = 28.0;

356

if (s_FindHitProbability(m, (Int4)(w1 + 0.5),

357

min_percent_identity,

358

min_align_length) != 0) {

359

return 0;

360

}

361

p1 = m->hit_probability - TARGET_HIT_PROB;

362

363

w0 = 11.0;

364

if (s_FindHitProbability(m, (Int4)(w0 + 0.5),

365

min_percent_identity,

366

min_align_length) != 0) {

367

return 0;

368

}

369

p0 = m->hit_probability - TARGET_HIT_PROB;

370

371

/* modify the initial range if it does not bracket the

372

target probability */

373

if (p1 > 0) {

374

375

/* push the range to the right. Progressively double

376

the word size until the root is bracketed or the

377

maximum word size is reached */

378

379

while (p1 > 0 && w1 < k_max_w) {

380

w0 = w1; p0 = p1;

381

w1 = MIN(2 * w1, k_max_w);

382

if (s_FindHitProbability(m, (Int4)(w1 + 0.5),

383

min_percent_identity,

384

min_align_length) != 0) {

385

return 0;

386

}

387

p1 = m->hit_probability - TARGET_HIT_PROB;

388

}

389

390

/* if the root is still not bracketed, return the

391

largest possible word size */

392

393

if (p1 > 0)

394

return (Int4)(w1 + 0.5);

395

}

396

else if (p0 < 0) {

397

398

/* push the range to the left. The smallest word size

399

is reached much sooner, so choose it immediately */

400

401

w1 = w0; p1 = p0;

402

w0 = k_min_w;

403

if (s_FindHitProbability(m, (Int4)(w0 + 0.5),

404

min_percent_identity,

405

min_align_length) != 0) {

406

return 0;

407

}

408

p0 = m->hit_probability - TARGET_HIT_PROB;

409

410

/* and return that word size if it's still not enough */

411

if (p0 < 0)

412

return (Int4)(w0 + 0.5);

413

}

414

415

/* bisect the initial range until the bounds have

416

converged to each other */

417

while (fabs(w1 - w0) > 1) {

418

double p2, w2 = (w0 + w1) / 2;

419

420

if (s_FindHitProbability(m, (Int4)(w2 + 0.5),

421

min_percent_identity,

422

min_align_length) != 0) {

423

return 0;

424

}

425

p2 = m->hit_probability - TARGET_HIT_PROB;

426

427

if (p2 > 0.0) {

428

w0 = w2; p0 = p2;

429

}

430

else {

431

w1 = w2; p1 = p2;

432

}

433

}

434

435

/* conservatively return the lower bound, since that gives

436

a more accurate word size */

437

return (Int4)(w0 + 0.5);

438

}

439

440

/* see blast_tune.h */

441

Int4 BLAST_FindBestNucleotideWordSize(double min_percent_identity,

442

Int4 min_align_length)

443

{

444

MatrixData m;

445

Int4 retval;

446

447

/* perform sanity checks */

448

449

if (min_percent_identity >= 1.0 || min_percent_identity < 0.6)

450

return 0;

451

452

if (min_align_length > 10000)

453

min_align_length = 10000;

454

else if (min_align_length < 0)

455

return 0;

456

else if (min_align_length < 8)

457

return 4;

458

459

/* find the best word size */

460

s_MatrixDataInit(&m);

461

retval = s_FindWordSize(&m, min_percent_identity,

462

min_align_length);

463

s_MatrixDataFree(&m);

464

return retval;

465

}

Older »