111
118
NULL, NULL, NULL, TRUE, 'd', ARG_STRING, 0.0, 0, NULL}, /* ARG_DB */
112
119
{ "Query File", /* ARG_QUERY */
113
120
"stdin", NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
114
{ "Subject File (for two sequences comparison", /* ARG_SUBJECT */
121
{ "Query location offsets; format: \"start stop\";\n"
122
"Applies only if query file contains 1 sequence", /* ARG_QUERY_LOC */
123
NULL, NULL, NULL, TRUE, 'I', ARG_STRING, 0.0, 0, NULL},
124
{ "Subject File (for sequence sets comparison)", /* ARG_SUBJECT */
115
125
"stdin", NULL, NULL, FALSE, 'j', ARG_FILE_IN, 0.0, 0, NULL},
126
{ "Subject location offsets; format: \"start stop\";\n"/* ARG_SUBJECT_LOC */
127
"Applies only if subject file (-j) contains 1 sequence",
128
NULL, NULL, NULL, TRUE, 'J', ARG_STRING, 0.0, 0, NULL},
116
129
{ "Query strands to search against database: 0 or 3 is both, 1 is top, "
117
130
"2 is bottom", /* ARG_STRAND */
118
131
"0", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},
181
194
{"Identity percentage cut-off", /* ARG_PERC_IDENT */
182
195
"0", NULL, NULL, FALSE, 'P', ARG_FLOAT, 0.0, 0, NULL},
183
196
{ "Longest intron length for uneven gap HSP linking (tblastn only)",
184
"0", NULL, NULL, FALSE, 'I', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
197
"0", NULL, NULL, FALSE, 'z', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
185
198
{ "Number of database sequences to show one-line descriptions for (V)",
186
199
"500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL}, /* ARG_DESCRIPTIONS */
187
200
{ "Number of database sequence to show alignments for (B)", /* ARG_ALIGNMENTS */
204
217
"Format: \"oid1 oid2\"; ',', ':' or ';' can also be used as delimiters\n"
205
218
"Full database is searched if range not provided.", /* ARG_OIDRANGE */
206
219
NULL, NULL, NULL, TRUE, 'R', ARG_STRING, 0.0, 0, NULL},
207
{ "Produce on-the-fly tabular output",
208
"0", NULL, NULL, FALSE, 'B', ARG_INT, 0.0, 0, NULL} /* ARG_TABULAR */
220
{ "Produce on-the-fly tabular output; 1 - just offsets and quality values;\n"
221
"2 - add sequence data.",
222
"0", NULL, NULL, FALSE, 'B', ARG_INT, 0.0, 0, NULL}, /* ARG_TABULAR */
223
{ "Number of threads to use in preliminary search stage",
224
"1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL}, /* ARG_THREADS */
225
{ "Show gis in sequence ids",
226
"F", NULL, NULL, FALSE, 'n', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_SHOWGI */
227
{ "Show only accessions for sequence ids in tabular output",
228
"F", NULL, NULL, FALSE, 'N', ARG_BOOLEAN, 0.0, 0, NULL}/* ARG_ACCESSION */
231
extern void PrintTabularOutputHeader PROTO((char* blast_database,
232
BioseqPtr query_bsp, SeqLocPtr query_slp, char* blast_program,
233
Int4 iteration, Boolean believe_query, FILE *outfp));
211
235
static Int2 BLAST_FillRPSInfo( RPSInfo **ppinfo, Nlm_MemMap **rps_mmap,
212
236
Nlm_MemMap **rps_pssm_mmap, CharPtr dbname )
364
387
if (myargs[ARG_PHI].strvalue) {
365
388
lookup_options->phi_pattern = strdup(myargs[ARG_PHI].strvalue);
366
389
lookup_options->lut_type =
367
((program_number == blast_type_blastn) ?
390
((program_number == eBlastTypeBlastn) ?
368
391
PHI_NA_LOOKUP : PHI_AA_LOOKUP);
369
392
hit_options->phi_align = TRUE;
372
395
BLAST_FillQuerySetUpOptions(query_setup_options, program_number,
373
myargs[ARG_FILTER].strvalue, myargs[ARG_STRAND].intvalue);
396
myargs[ARG_FILTER].strvalue, (Uint1)myargs[ARG_STRAND].intvalue);
375
398
if (myargs[ARG_GENCODE].intvalue &&
376
(program_number == blast_type_blastx ||
377
program_number == blast_type_tblastx))
399
(program_number == eBlastTypeBlastx ||
400
program_number == eBlastTypeTblastx))
378
401
query_setup_options->genetic_code = myargs[ARG_GENCODE].intvalue;
380
403
BLAST_FillInitialWordOptions(word_options, program_number,
381
(greedy_extension && !greedy_with_ungapped),
404
(Boolean)(greedy_extension && !greedy_with_ungapped),
382
405
myargs[ARG_WINDOW].intvalue, variable_wordsize, ag_blast, mb_lookup,
383
406
myargs[ARG_XDROP_UNGAPPED].intvalue);
385
408
BLAST_FillExtensionOptions(ext_options, program_number, greedy_extension,
386
409
myargs[ARG_XDROP].intvalue, myargs[ARG_XDROP_FINAL].intvalue);
388
if (program_number == blast_type_rpsblast ||
389
program_number == blast_type_rpstblastn) {
411
if (program_number == eBlastTypeRpsBlast ||
412
program_number == eBlastTypeRpsTblastn) {
390
413
BLAST_FillScoringOptions(score_options, program_number, FALSE,
391
414
myargs[ARG_MISMATCH].intvalue, myargs[ARG_MATCH].intvalue,
392
"BLOSUM62", rps_info->aux_info.gap_open_penalty,
415
rps_info->aux_info.orig_score_matrix,
416
rps_info->aux_info.gap_open_penalty,
393
417
rps_info->aux_info.gap_extend_penalty);
395
419
BLAST_FillScoringOptions(score_options, program_number,
493
554
ErrSetMessageLevel(SEV_WARNING);
495
if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
496
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
497
myargs[ARG_OUT].strvalue);
501
tabular_output = (Boolean)myargs[ARG_TABULAR].intvalue;
556
tabular_output = myargs[ARG_TABULAR].intvalue;
503
558
blast_program = strdup(myargs[ARG_PROGRAM].strvalue);
504
559
BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
506
db_is_na = (program_number == blast_type_blastn ||
507
program_number == blast_type_tblastn ||
508
program_number == blast_type_tblastx);
509
query_is_na = (program_number == blast_type_blastn ||
510
program_number == blast_type_blastx ||
511
program_number == blast_type_rpstblastn ||
512
program_number == blast_type_tblastx);
514
rps_blast = (program_number == blast_type_rpsblast ||
515
program_number == blast_type_rpstblastn);
561
db_is_na = (program_number == eBlastTypeBlastn ||
562
program_number == eBlastTypeTblastn ||
563
program_number == eBlastTypeTblastx);
564
query_is_na = (program_number == eBlastTypeBlastn ||
565
program_number == eBlastTypeBlastx ||
566
program_number == eBlastTypeRpsTblastn ||
567
program_number == eBlastTypeTblastx);
569
rps_blast = (program_number == eBlastTypeRpsBlast ||
570
program_number == eBlastTypeRpsTblastn);
572
num_threads = myargs[ARG_THREADS].intvalue;
517
574
BLAST_InitDefaultOptions(program_number, &lookup_options,
518
575
&query_options, &word_options, &ext_options, &hit_options,
532
590
sfree(subject_file);
534
BLAST_GetQuerySeqLoc(infp2, db_is_na, 0, 0, 0, NULL, &subject_slp,
592
letters_read = BLAST_GetQuerySeqLoc(infp2, db_is_na, 0, 0, 0, 0, NULL, &subject_slp,
594
if (letters_read <= 0)
596
ErrPostEx(SEV_FATAL, 1, 0, "Bad return for BLAST_GetQuerySeqLoc\n");
536
599
FileClose(infp2);
601
if ((status = x_RestrictSeqLocToInterval(&subject_slp,
602
myargs[ARG_SUBJECT_LOC].strvalue)) != 0) {
603
ErrPostEx(SEV_FATAL, 1, 0,
604
"Subject location outside of the sequence range\n");
538
608
seq_src = MultiSeqSrcInit(subject_slp, program_number);
540
ctr = BLASTSeqSrcGetNumSeqs(seq_src);
542
610
int first_db_seq = 0;
543
611
int final_db_seq = 0;
565
633
BLAST_FillOptions(lookup_options, query_options, word_options,
566
634
ext_options, hit_options, score_options, eff_len_options,
567
635
psi_options, db_options, seq_src, rps_info);
568
if (!tabular_output) {
636
if (tabular_output) {
637
if ((outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
638
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n",
639
myargs[ARG_OUT].strvalue);
569
643
if ((status = BlastFormattingOptionsNew(program_number,
570
644
myargs[ARG_OUT].strvalue,
571
645
myargs[ARG_DESCRIPTIONS].intvalue,
572
646
myargs[ARG_ALIGNMENTS].intvalue,
573
647
myargs[ARG_FORMAT].intvalue, &format_options)) != 0)
575
format_options->html = (Boolean) myargs[ARG_HTML].intvalue;
578
dbname = BLASTSeqSrcGetName(seq_src);
650
if (myargs[ARG_HTML].intvalue) {
651
format_options->html = TRUE;
652
format_options->align_options += TXALIGN_HTML;
653
format_options->print_options += TXALIGN_HTML;
656
if (myargs[ARG_SHOWGI].intvalue) {
657
format_options->align_options += TXALIGN_SHOW_GI;
658
format_options->print_options += TXALIGN_SHOW_GI;
580
662
BLAST_PrintOutputHeader(format_options,
581
myargs[ARG_GREEDY].intvalue, dbname, !db_is_na);
663
(Boolean)myargs[ARG_GREEDY].intvalue, blast_program, dbname, !db_is_na);
666
if (myargs[ARG_UNGAPPED].intvalue != 0)
667
format_options->print_options += TXALIGN_SHOW_NO_OF_SEGS;
585
671
if ((infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
587
673
myargs[ARG_QUERY].strvalue);
591
diagnostics = Blast_DiagnosticsInit();
593
translated_query = (program_number == blast_type_blastx ||
594
program_number == blast_type_tblastx);
677
if (num_threads > 1) {
678
diagnostics = Blast_DiagnosticsInitMT(Blast_MT_LOCKInit());
680
diagnostics = Blast_DiagnosticsInit();
683
translated_query = (program_number == eBlastTypeBlastx ||
684
program_number == eBlastTypeTblastx ||
685
program_number == eBlastTypeRpsTblastn);
688
believe_defline = TRUE;
597
690
/* Get the query (queries), loop if necessary. */
599
693
if ((Boolean)myargs[ARG_LCASE].intvalue) {
600
done = BLAST_GetQuerySeqLoc(infp, query_is_na,
601
myargs[ARG_STRAND].intvalue, 0, 0,
602
&lcase_mask, &query_slp, ctr,
694
letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na,
695
(Uint1)myargs[ARG_STRAND].intvalue, 0, 0, 0,
696
&lcase_mask, &query_slp, &ctr,
697
&num_queries, believe_defline);
605
done = BLAST_GetQuerySeqLoc(infp, query_is_na,
606
myargs[ARG_STRAND].intvalue, 0, 0, NULL, &query_slp,
610
if (translated_query) {
611
BlastMaskLocDNAToProtein(&lcase_mask, query_slp);
699
letters_read = BLAST_GetQuerySeqLoc(infp, query_is_na,
700
(Uint1)myargs[ARG_STRAND].intvalue, 0, 0, 0, NULL, &query_slp,
701
&ctr, &num_queries, believe_defline);
704
if (letters_read == 0)
707
if (letters_read < 0)
709
ErrPostEx(SEV_FATAL, 1, 0, "BLAST_GetQuerySeqLoc returned an error\n");
713
if ((status = x_RestrictSeqLocToInterval(&query_slp,
714
myargs[ARG_QUERY_LOC].strvalue)) != 0) {
715
ErrPostEx(SEV_FATAL, 1, 0,
716
"Query location outside of the sequence range\n");
720
if (lcase_mask && translated_query) {
721
BlastMaskLoc* blast_maskloc_tmp = BlastMaskLocNew(NUM_FRAMES);
722
BlastMaskLocDNAToProtein(lcase_mask->seqloc_array[0],
723
blast_maskloc_tmp, 0, query_slp);
724
lcase_mask = BlastMaskLocFree(lcase_mask);
725
lcase_mask = blast_maskloc_tmp;
614
728
status = BLAST_SetUpQuery(program_number, query_slp,
615
729
query_options, &query_info, &query);
732
ErrPostEx(SEV_FATAL, 1, 0, "BLAST_SetUpQuery returned non-zero status: %d\n", status);
617
736
query->lcase_mask = lcase_mask;
619
739
if ((status = BLAST_ValidateOptions(program_number, ext_options,
620
score_options, lookup_options, hit_options,
621
&blast_message)) != 0) {
740
score_options, lookup_options, word_options,
741
hit_options, &blast_message)) != 0) {
622
742
Blast_MessagePost(blast_message);
645
770
lookup_segments, sbp, &lookup_wrap, rps_info);
647
772
if (!tabular_output) {
648
Int4 num_results = (rps_blast ? BLASTSeqSrcGetNumSeqs(seq_src) :
649
query_info->num_queries);
774
(rps_blast ? BLASTSeqSrcGetNumSeqs(seq_src) : num_queries);
650
775
/* Results in the collector stream should be sorted only for a
651
776
database search. The latter is true if and only if the sequence
652
777
source has non-zero database length. */
653
778
Boolean sort_on_read = (BLASTSeqSrcGetTotLen(seq_src) != 0);
780
if (num_threads > 1) {
781
lock = Blast_MT_LOCKInit();
655
Blast_HSPListCollectorInit(program_number, hit_options,
656
num_results, sort_on_read);
784
Blast_HSPListCollectorInitMT(program_number, hit_options,
785
num_results, sort_on_read, lock);
786
results_ptr = &results;
788
/* Print the header of tabular output. */
789
PrintTabularOutputHeader(myargs[ARG_DB].strvalue, NULL, query_slp,
790
blast_program, 0, FALSE, outfp);
658
792
hsp_stream = Blast_HSPListQueueInit();
659
793
tf_data = Blast_TabularFormatDataInit(program_number, hsp_stream,
660
794
seq_src, query, query_info, score_options, sbp,
661
795
eff_len_options, ext_options, hit_options, db_options,
662
796
query_slp, outfp);
798
if (tabular_output == 2) {
799
if (program_number == eBlastTypeBlastn) {
800
tf_data->format_options = eBlastTabularAddSequences;
803
"WARNING: Sequences printout in tabular output"
804
" allowed only for blastn\n");
808
tf_data->show_gi = (Boolean) myargs[ARG_SHOWGI].intvalue;
809
tf_data->show_accession = (Boolean) myargs[ARG_ACCESSION].intvalue;
663
811
/* Start the formatting thread */
812
if(NlmThreadsAvailable() &&
665
814
NlmThreadCreate(Blast_TabularFormatThread, (void*) tf_data))
666
815
== NULL_thread) {
668
817
"Cannot create thread for formatting tabular output\n");
674
BLAST_RPSSearchEngine(program_number, query, query_info,
823
if (!NlmThreadsAvailable() || num_threads == 1) {
824
if ((status=BLAST_SearchEngine(program_number, query, query_info,
675
825
seq_src, sbp, score_options, lookup_wrap,
676
826
word_options, ext_options, hit_options, eff_len_options,
677
827
psi_options, db_options, hsp_stream, diagnostics,
678
(tabular_output ? NULL : &results));
831
ErrPostEx(SEV_FATAL, 1, 0, "BLAST_SearchEngine failed\n");
680
BLAST_SearchEngine(program_number, query, query_info,
681
seq_src, sbp, score_options, lookup_wrap,
682
word_options, ext_options, hit_options, eff_len_options,
683
psi_options, db_options, hsp_stream, diagnostics,
684
(tabular_output ? NULL : &results));
835
TNlmThread* thread_array =
836
(TNlmThread*) calloc(num_threads, sizeof(TNlmThread));
837
BlastPrelimSearchThreadData* search_data = NULL;
838
void* join_status = NULL;
841
for (index = 0; index < num_threads; index++) {
843
BlastPrelimSearchThreadDataInit(program_number, query,
844
query_info, seq_src, lookup_wrap, score_options,
845
word_options, ext_options, hit_options, eff_len_options,
846
psi_options, db_options, sbp, diagnostics, hsp_stream);
847
thread_array[index] =
848
NlmThreadCreate(Blast_PrelimSearchThreadRun,
849
(void*) search_data);
851
for (index = 0; index < num_threads; index++) {
852
NlmThreadJoin(thread_array[index], &join_status);
855
MemFree(thread_array);
687
858
if (tabular_output) {
688
859
void* join_status = NULL;
860
BlastHSPStreamClose(hsp_stream);
689
861
NlmThreadJoin(format_thread, &join_status);
862
} else if (num_threads > 1) {
863
Blast_RunTracebackSearch(program_number, query, query_info, seq_src,
864
score_options, ext_options, hit_options, eff_len_options,
865
db_options, psi_options, sbp, hsp_stream, results_ptr);
692
868
hsp_stream = BlastHSPStreamFree(hsp_stream);
703
879
/* The following works because the ListNodes' data point to simple
704
880
double-integer structures */
705
lookup_segments = ListNodeFreeData(lookup_segments);
881
lookup_segments = BlastSeqLocFree(lookup_segments);
706
882
if (!tabular_output) {
707
/* Convert results to the SeqAlign form */
708
BLAST_ResultsToSeqAlign(program_number, results, query_slp, seq_src,
709
score_options->gapped_calculation, score_options->is_ooframe,
712
results = Blast_HSPResultsFree(results);
883
/* Get hold of a ReadDB data structure */
884
Blast_SummaryReturn* sum_returns=NULL;
885
ReadDBFILE* rdfp = NULL;
887
rdfp = readdb_new(dbname, !db_is_na);
888
/* Convert results to the SeqAlign form */
889
BLAST_ResultsToSeqAlign(program_number, results, query_slp, rdfp, subject_slp,
890
score_options->gapped_calculation, score_options->is_ooframe,
893
Blast_AdjustOffsetsInSeqAlign(seqalign, query_slp, subject_slp);
895
results = Blast_HSPResultsFree(results);
714
if (myargs[ARG_ASNOUT].strvalue) {
715
AsnIoPtr asnout = AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
716
GenericSeqAlignSetAsnWrite(seqalign, asnout);
717
asnout = AsnIoClose(asnout);
897
if (myargs[ARG_ASNOUT].strvalue) {
899
AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
900
GenericSeqAlignSetAsnWrite(seqalign, asnout);
901
asnout = AsnIoClose(asnout);
720
/* Format the results; note that seqalign and filter locations
722
status = BLAST_FormatResults(seqalign, dbname,
723
blast_program, query_info->num_queries, query_slp,
724
filter_loc, format_options, score_options->is_ooframe);
725
PrintOutputFooter(program_number, format_options, score_options, sbp,
726
lookup_options, word_options, ext_options,
727
hit_options, eff_len_options, query_info,
728
seq_src, diagnostics);
904
/* Format the results */
906
BLAST_FormatResults(seqalign, dbname,
907
blast_program, query_info->num_queries, query_slp,
908
filter_loc, format_options, score_options->is_ooframe, NULL, NULL);
910
seqalign = SeqAlignSetFree(seqalign);
912
Blast_SummaryReturnFill(program_number, score_options, sbp,
913
lookup_options, word_options, ext_options, hit_options,
914
eff_len_options, query_options, query_info, rdfp, subject_slp,
915
&diagnostics, &sum_returns);
916
Blast_PrintOutputFooter(program_number, format_options, rdfp, sum_returns);
917
sum_returns = Blast_SummaryReturnFree(sum_returns);
918
rdfp = readdb_destruct(rdfp);
729
919
} /* if not tabular output */
730
921
query = BlastSequenceBlkFree(query);
731
922
BlastMaskLocFree(filter_loc);
732
923
query_info = BlastQueryInfoFree(query_info);