281
376
while (number < bsp->length) {
282
377
retval = SeqPortRead(spp, buf, buf_length);
285
382
BSWrite(bsp->seq_data, buf, retval);
286
383
number += retval;
289
386
SeqPortFree(spp);
387
if (believe_defline) {
388
new_defline = StringSave(BioseqGetTitle(bsp_tmp));
390
title_length = StringLen(BioseqGetTitle(bsp_tmp));
391
SeqIdWrite(bsp_tmp->id, tmp, PRINTID_FASTA_LONG, LOCAL_BUFLEN);
392
id_length = StringLen(tmp);
393
title_length += id_length;
395
new_defline = (CharPtr) MemNew(title_length*sizeof(Char));
396
StringCpy(new_defline, tmp);
397
*(new_defline+id_length) = ' ';
398
StringCpy(new_defline+id_length+1, BioseqGetTitle(bsp_tmp));
399
*(new_defline+title_length-1) = NULLB;
402
bsp->descr = ValNodeAddStr(NULL, Seq_descr_title, new_defline);
403
bsp->id = ValNodeNew(NULL);
405
oid->str = (char*) Malloc(LOCAL_BUFLEN+1);
406
if (believe_defline) {
407
SeqIdWrite(bsp_tmp->id, oid->str, PRINTID_FASTA_LONG, LOCAL_BUFLEN);
409
sprintf(oid->str, "%d", id_num);
411
bsp->id->choice = SEQID_LOCAL;
412
bsp->id->data.ptrvalue = (Pointer) oid;
413
SeqMgrDeleteFromBioseqIndex(bsp_tmp);
414
BioseqUnlock(bsp_tmp);
291
416
ID1BioseqFetchDisable();
296
420
#define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
298
452
static Args myargs [] = {
299
453
{ "First sequence",
300
NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* 0 */
454
NULL, NULL, NULL, FALSE, 'i', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_QUERY */
301
455
{ "Second sequence",
302
NULL, NULL, NULL, FALSE, 'j', ARG_FILE_IN, 0.0, 0, NULL}, /* 1 */
456
NULL, NULL, NULL, FALSE, 'j', ARG_FILE_IN, 0.0, 0, NULL}, /* ARG_SUBJECT */
303
457
{ "Program name: blastp, blastn, blastx, tblastn, tblastx. For blastx 1st sequence should be nucleotide, tblastn 2nd sequence nucleotide",
304
NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL},/* 2 */
458
NULL, NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL}, /* ARG_PROGRAM */
306
"T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL},/* 3 */
460
"T", NULL, NULL, FALSE, 'g', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_GAPPED */
307
461
{ "alignment output file",
308
"stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},/* 4 */
462
"stdout", NULL, NULL, FALSE, 'o', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_OUT */
309
463
{ "theor. db size (zero is real size)",
310
"0", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},/* 5 */
464
"0", NULL, NULL, FALSE, 'd', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_DBSIZE */
311
465
{ "SeqAnnot output file",
312
NULL, NULL, NULL, TRUE, 'a', ARG_FILE_OUT, 0.0, 0, NULL},/* 6 */
313
{ "Cost to open a gap (zero invokes default behavior)",
314
"0", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},/* 7 */
315
{ "Cost to extend a gap (zero invokes default behavior)",
316
"0", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},/* 8 */
317
{ "X dropoff value for gapped alignment (in bits) (zero invokes default behavior)",
318
"0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL},/* 9 */
319
{ "Wordsize (zero invokes default behavior)",
320
"0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},/* 10 */
466
NULL, NULL, NULL, TRUE, 'a', ARG_FILE_OUT, 0.0, 0, NULL}, /* ARG_ASNOUT */
467
{ "Cost to open a gap (-1 invokes default behavior; non-default values can result in unreliable statistics)",
468
"-1", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPOPEN */
469
{ "Cost to extend a gap (-1 invokes default behavior; non-default values can result in unreliable statistics)",
470
"-1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL}, /* ARG_GAPEXT */
471
{ "X dropoff value for gapped alignment (in bits) (zero invokes default "
472
"behavior)\n blastn 30, megablast 20, tblastx 0, all others 15",
473
"0", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL}, /* ARG_XDROP */
474
{ "Word size, default if zero (blastn 11, megablast 28, "
476
"0", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL}, /* ARG_WORDSIZE */
322
"BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL},/* 11 */
478
"BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL}, /* ARG_MATRIX */
323
479
{ "Penalty for a nucleotide mismatch (blastn only)",
324
"-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL},/* 12 */
480
"-3", NULL, NULL, FALSE, 'q', ARG_INT, 0.0, 0, NULL}, /* ARG_MISMATCH */
325
481
{ "Reward for a nucleotide match (blastn only)",
326
"1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL},/* 13 */
482
"1", NULL, NULL, FALSE, 'r', ARG_INT, 0.0, 0, NULL}, /* ARG_MATCH */
327
483
{ "Filter query sequence (DUST with blastn, SEG with others)",
328
"T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL},/* 14 */
484
"T", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL}, /* ARG_FILTER */
329
485
{ "Expectation value (E)",
330
"10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},/* 15 */
486
"10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_EVALUE */
331
487
{ "Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom",
332
"3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},/* 16 */
488
"3", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL}, /* ARG_STRAND */
333
489
{ "Produce HTML output",
334
"F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},/* 17 */
490
"F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_HTML */
335
491
{ "Use Mega Blast for search",
336
"F", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL},/* 18 */
492
"F", NULL, NULL, TRUE, 'm', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_USEMEGABLAST */
337
493
{ "Effective length of the search space (use zero for the real size)",
338
"0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL}, /* 19 */
339
{ "Input sequences in the form of accession.version",
340
"F", NULL, NULL, FALSE, 'A', ARG_BOOLEAN, 0.0, 0, NULL}, /* 20 */
494
"0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL}, /* ARG_SEARCHSP */
341
495
{ "Length of the largest intron allowed in tblastn for linking HSPs",
342
"0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL},/* 21 */
496
"0", NULL, NULL, FALSE, 't', ARG_INT, 0.0, 0, NULL}, /* ARG_INTRON */
343
497
{ "Location on first sequence",
344
NULL, NULL, NULL, TRUE, 'I', ARG_STRING, 0.0, 0, NULL},/* 22 */
498
NULL, NULL, NULL, TRUE, 'I', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC1 */
345
499
{ "Location on second sequence",
346
NULL, NULL, NULL, TRUE, 'J', ARG_STRING, 0.0, 0, NULL},/* 23 */
347
{ "Output format: 0 - traditional, 1 - tabulated",
348
"0", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* 24 */
500
NULL, NULL, NULL, TRUE, 'J', ARG_STRING, 0.0, 0, NULL}, /* ARG_LOC2 */
501
{ "Output format: 0 - traditional, 1 - tabular",
502
"0", NULL, NULL, FALSE, 'D', ARG_INT, 0.0, 0, NULL}, /* ARG_FORMAT */
349
503
{ "Use lower case filtering for the query sequence",
350
"F", NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}/* 25 */
504
"F", NULL, NULL, TRUE, 'U', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_LCASE */
505
{ "Input sequences in the form of accession.version",
506
"F", NULL, NULL, FALSE, 'A', ARG_BOOLEAN, 0.0, 0, NULL}, /* ARG_ACCN */
507
{"Force use of old engine",
508
"T", NULL, NULL, TRUE, 'V', ARG_BOOLEAN, 0.0, 0, NULL} /* ARG_FORCE_OLD */
512
* Fetches sequences filling in either just the Bioseq's (if fetch from Entrez) or
513
* both the BioseqPtr's and the SeqEntryPtr's (if read from FASTA). The lcase_mask
514
* is also filled in with letters in query that were lower-case if myargs[ARG_LCASE].intvalue
517
* @param seq1_is_na the query sequence is DNA if true [in]
518
* @param seq2_is_na the subject sequence is DNA if true [in]
519
* @param query_bsp pointer to query BioseqPtr, to be filled in [out]
520
* @param subject_bsp pointer to subject BioseqPtr, to be filled in [out]
521
* @param sep pointer to query SeqEntryPtr, to be filled in [out]
522
* @param sep1 pointer to subject SeqEntryPtr, to be filled in [out]
523
* @param lcase_mask pointer to lower-case masking data to be filled in [out]
524
* @return TRUE on success, FALSE on failure.
528
BL2SEQ_GetSequences(Boolean seq1_is_na, Boolean seq2_is_na, BioseqPtr *query_bsp, BioseqPtr *subject_bsp,
529
SeqEntryPtr *sep, SeqEntryPtr *sep1, SeqLocPtr *lcase_mask, Boolean believe_query)
531
Boolean entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
532
char *query_accver = NULL, *subject_accver = NULL; /* Used if entrez_lookup. */
533
char *blast_inputfile = NULL, *blast_inputfile1 = NULL; /* Used if FASTA read. */
536
query_accver = myargs [ARG_QUERY].strvalue;
537
subject_accver = myargs [ARG_SUBJECT].strvalue;
539
blast_inputfile = myargs [ARG_QUERY].strvalue;
540
blast_inputfile1 = myargs [ARG_SUBJECT].strvalue;
544
*query_bsp = BioseqFromAccession(query_accver, 1, seq1_is_na, TRUE);
547
if ((infp = FileOpen(blast_inputfile, "r")) == NULL)
549
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile);
552
if (myargs[ARG_LCASE].intvalue)
553
*sep = FastaToSeqEntryForDb(infp, seq1_is_na, NULL,
554
believe_query, NULL, NULL,
557
*sep = FastaToSeqEntryEx(infp, seq1_is_na, NULL, believe_query);
564
SeqEntryExplore(*sep, query_bsp, FindNuc);
566
SeqEntryExplore(*sep, query_bsp, FindProt);
570
if (*query_bsp == NULL) {
571
ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
577
BioseqFromAccession(subject_accver, 2, seq2_is_na, FALSE);
580
if ((infp1 = FileOpen(blast_inputfile1, "r")) == NULL)
582
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n", blast_inputfile1);
585
*sep1 = FastaToSeqEntryEx(infp1, seq2_is_na, NULL, FALSE);
592
SeqEntryExplore(*sep1, subject_bsp, FindNuc);
594
SeqEntryExplore(*sep1, subject_bsp, FindProt);
599
if (*subject_bsp == NULL) {
600
ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
608
* Creates SeqLoc's from the given BioseqPtr's. if myargs for ARG_LOC1 or ARG_LOC2 are non-NULL,
609
* these are used in the creation of the SeqLoc's.
611
* @param bsp1 the query BioseqPtr [in]
612
* @param bsp2 the subject BioseqPtr [in]
613
* @param slp1 the query SeqLocPtr to be filled in [out]
614
* @param slp2 the subject SeqLocPtr to be filled in [out]
615
* @param strand_option specifies strand of slp1 [in]
616
* @return TRUE on success, FALSE on failure.
620
BL2SEQ_MakeSeqLoc(const BioseqPtr bsp1, const BioseqPtr bsp2, SeqLocPtr *slp1, SeqLocPtr *slp2, Uint1 strand_option)
622
const char* k_delimiters = " ,;";
629
location = myargs[ARG_LOC1].strvalue;
631
from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
632
to = atoi(location) - 1;
636
to = bsp1->length - 1;
637
to = MIN(to, bsp1->length - 1);
638
if (from >= bsp1->length) {
639
ErrPostEx(SEV_FATAL, 1, 0,
640
"Location outside of the first sequence range\n");
643
*slp1 = SeqLocIntNew(from, to, strand_option,
644
SeqIdFindBestAccession(bsp1->id));
645
} else if (strand_option != Seq_strand_both) {
646
*slp1 = SeqLocIntNew(0, bsp1->length-1, strand_option,
647
SeqIdFindBestAccession(bsp1->id));
649
ValNodeAddPointer(slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id)));
651
location = myargs[ARG_LOC2].strvalue;
653
from = atoi(StringTokMT(location, k_delimiters, &location)) - 1;
654
to = atoi(location) - 1;
658
to = bsp2->length - 1;
659
to = MIN(to, bsp2->length - 1);
660
if (from >= bsp2->length) {
661
ErrPostEx(SEV_FATAL, 1, 0,
662
"Location outside of the second sequence range\n");
665
*slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id));
667
ValNodeAddPointer(slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id)));
673
* Initializes and sets the summary options based upon the command-line args.
675
* @param ret_options object to be initialized and filled in [out]
676
* @param program_number specifies blastn/blastp/blastx etc. [in]
677
* @return TRUE on success, FALSE on failure.
680
Bl2SEQ_SummaryOptionsSet(BLAST_SummaryOptions* *ret_options, EBlastProgramType program_number)
682
BLAST_SummaryOptions* options;
684
if (BLAST_SummaryOptionsInit(&options) != 0)
686
ErrPostEx(SEV_FATAL, 1, 0, "SummaryOptionsInit failed.");
690
options->hint = eNone;
692
switch (program_number) {
693
case eBlastTypeBlastn:
694
options->program = eBlastn;
696
case eBlastTypeBlastp:
697
options->program = eBlastp;
699
case eBlastTypeBlastx:
700
options->program = eBlastx;
702
case eBlastTypeTblastn:
703
options->program = eTblastn;
705
case eBlastTypeTblastx:
706
options->program = eTblastx;
709
ErrPostEx(SEV_FATAL, 1, 0, "Program_number (%ld) not valid in Bl2SEQ_SummaryOptionsSet", (long) program_number);
710
BLAST_SummaryOptionsFree(options);
714
options->cutoff_evalue = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
716
if (options->program == eBlastn)
718
options->nucleotide_mismatch = myargs[ARG_MISMATCH].intvalue;
719
options->nucleotide_match = myargs[ARG_MATCH].intvalue;
720
if (myargs[ARG_USEMEGABLAST].intvalue > 0)
721
options->use_megablast = TRUE;
724
if (myargs[ARG_GAPOPEN].intvalue != -1)
725
options->gap_open = myargs[ARG_GAPOPEN].intvalue;
727
if (myargs[ARG_GAPEXT].intvalue != -1)
728
options->gap_extend = myargs[ARG_GAPEXT].intvalue;
730
options->strand = myargs[ARG_STRAND].intvalue;
732
if (myargs[ARG_WORDSIZE].intvalue != 0)
733
options->word_size = myargs[ARG_WORDSIZE].intvalue;
735
if (myargs[ARG_MATRIX].strvalue)
736
options->matrix = StringSave(myargs[ARG_MATRIX].strvalue);
738
if (myargs[ARG_FILTER].strvalue)
739
options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
741
if (myargs[ARG_XDROP].intvalue != 0)
743
options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
746
*ret_options = options;
754
BioseqPtr query_bsp=NULL, subject_bsp=NULL;
755
BioseqPtr bsp1=NULL, bsp2=NULL;
756
BioseqPtr fake_bsp=NULL, fake_subject_bsp=NULL;
757
BlastFormattingOptions* format_options;
758
BlastMaskLoc* blast_mask_head = NULL;
759
BLAST_SummaryOptions* options=NULL;
760
Blast_SummaryReturn* extra_returns=NULL;
761
Boolean believe_query= FALSE;
762
Boolean seq1_is_na, seq2_is_na; /* seq1/2 is DNA if TRUE. */
763
Boolean seqannot_output; /* SeqAlign will be output. */
764
Boolean entrez_lookup; /* QUery/subject fetched from Entrez. */
765
Boolean mask_at_hash=FALSE; /* masking only on lookup table if TRUE. */
767
EBlastProgramType program_number;
768
Int2 status; /* return value */
769
Int4 align_view=0; /* Used for formatting */
770
SeqAlignPtr seqalign=NULL;
771
SeqEntryPtr sep=NULL, sep1=NULL;
772
SeqLocPtr slp1, slp2; /* Used for actual search. */
773
SeqLocPtr filter_loc=NULL; /* Location of regions filtered (returned by engine) */
774
SeqLocPtr filter_loc_head=NULL; /* Location of regions filtered (returned by engine) */
775
SeqLocPtr lcase_mask=NULL; /* For lower-case masking info from query FASTA. */
776
Uint1 strand_option = 0; /* FIXME */
778
strand_option = (Uint1) myargs[ARG_STRAND].intvalue;
780
entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
781
seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
782
believe_query = (seqannot_output || entrez_lookup);
783
if (myargs[ARG_FORMAT].intvalue != 0)
784
align_view = 9; /* Means tabular output. */
786
BlastProgram2Number(myargs[ARG_PROGRAM].strvalue, &program_number);
788
seq1_is_na = (program_number == eBlastTypeBlastn ||
789
program_number == eBlastTypeBlastx ||
790
program_number == eBlastTypeRpsTblastn ||
791
program_number == eBlastTypeTblastx);
793
seq2_is_na = (program_number == eBlastTypeBlastn ||
794
program_number == eBlastTypeTblastn ||
795
program_number == eBlastTypeTblastx);
797
if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1,
798
&lcase_mask, believe_query) == FALSE)
800
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
805
fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
807
fake_subject_bsp = BioseqNew();
808
fake_subject_bsp->descr = subject_bsp->descr;
809
fake_subject_bsp->repr = subject_bsp->repr;
810
fake_subject_bsp->mol = subject_bsp->mol;
811
fake_subject_bsp->length = subject_bsp->length;
812
fake_subject_bsp->seq_data = subject_bsp->seq_data;
813
fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
814
dbtagptr = DbtagNew();
815
dbtagptr->db = StringSave("BL_ORD_ID");
816
dbtagptr->tag = ObjectIdNew();
817
dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
818
ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
825
bsp2 = fake_subject_bsp;
827
if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, strand_option) == FALSE)
830
if (Bl2SEQ_SummaryOptionsSet(&options, program_number) == FALSE)
833
status = BLAST_TwoSeqLocSets(options, slp1, slp2, lcase_mask, &seqalign, &filter_loc_head, &mask_at_hash, &extra_returns);
836
ErrPostEx(SEV_FATAL, 1, 0, "BLAST_TwoSeqLocSets failed");
840
if (myargs[ARG_ASNOUT].strvalue) {
842
AsnIoOpen(myargs[ARG_ASNOUT].strvalue, (char*)"w");
843
GenericSeqAlignSetAsnWrite(seqalign, asnout);
844
asnout = AsnIoClose(asnout);
847
if ((status = BlastFormattingOptionsNew(program_number, myargs[ARG_OUT].strvalue, 0, 1,
848
align_view, &format_options)) != 0)
851
if (myargs[ARG_HTML].intvalue) {
852
format_options->html = TRUE;
853
format_options->align_options += TXALIGN_HTML;
854
format_options->print_options += TXALIGN_HTML;
861
filter_loc = filter_loc_head;
865
filter_loc = filter_loc->next;
867
filter_loc = filter_loc_head;
868
blast_mask_head = BlastMaskLocNew(total);
871
blast_mask_head->seqloc_array[i] = BlastSeqLocFromSeqLoc(filter_loc->data.ptrvalue);
872
filter_loc = filter_loc->next;
877
blast_mask_head = NULL;
879
/* Format the results */
880
status = BLAST_FormatResults(seqalign, NULL, myargs[ARG_PROGRAM].strvalue, 1, slp1,
881
blast_mask_head, format_options, FALSE, NULL, NULL);
883
status = Blast_PrintOutputFooter(program_number, format_options, NULL, extra_returns);
886
blast_mask_head = BlastMaskLocFree(blast_mask_head);
887
format_options = BlastFormattingOptionsFree(format_options);
888
extra_returns = Blast_SummaryReturnFree(extra_returns);
891
BioseqFree(query_bsp);
892
BioseqFree(subject_bsp);
898
options = BLAST_SummaryOptionsFree(options);
899
seqalign = SeqAlignFree(seqalign);
900
slp1 = SeqLocSetFree(slp1);
901
slp2 = SeqLocSetFree(slp2);
902
filter_loc = filter_loc_head;
905
SeqLocSetFree((SeqLocPtr) filter_loc->data.ptrvalue);
906
filter_loc->data.ptrvalue;
907
filter_loc = filter_loc->next;
909
ValNodeFree(filter_loc_head);
911
fake_bsp = BlastDeleteFakeBioseq(fake_bsp);
358
BioseqPtr fake_bsp, fake_subject_bsp, query_bsp, subject_bsp;
922
BioseqPtr fake_bsp = NULL, fake_subject_bsp = NULL, query_bsp = NULL,
359
924
BioseqPtr bsp1, bsp2;
360
925
BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
361
BLAST_OptionsBlkPtr options;
926
BLAST_OptionsBlkPtr options=NULL;
362
927
Boolean seq1_is_na, seq2_is_na;
363
CharPtr ret_buffer=NULL, params_buffer=NULL;
928
CharPtr params_buffer=NULL;
364
929
DbtagPtr dbtagptr;
365
930
Uint1 align_type;
366
931
Uint4 align_options;
367
932
SeqAlignPtr seqalign;
368
933
SeqAnnotPtr seqannot;
369
934
SeqEntryPtr sep = NULL, sep1 = NULL;
370
TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
371
CharPtr blast_inputfile, blast_inputfile1, program_name, blast_outputfile;
372
FILE *infp, *infp1, *outfp;
935
CharPtr program_name, blast_outputfile;
373
937
ValNodePtr mask_loc, mask_loc_start, vnp, other_returns=NULL, error_returns=NULL;
374
938
BLAST_MatrixPtr matrix;
375
939
Int4Ptr PNTR txmatrix;
376
940
int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)) = NULL;
377
941
Boolean entrez_lookup = FALSE;
378
CharPtr query_accver = NULL, subject_accver = NULL;
379
const char *dummystr;
381
if (! GetArgs ("bl2seq", NUMARG, myargs))
386
UseLocalAsnloadDataAndErrMsg ();
388
if (! SeqEntryLoad())
391
ErrSetMessageLevel(SEV_WARNING);
392
entrez_lookup = (Boolean) myargs[20].intvalue;
393
#ifndef NCBI_ENTREZ_CLIENT
395
ErrPostEx(SEV_WARNING, 0, 0, "Entrez client interface unavailable, -A option cannot be used");
400
query_accver = myargs [0].strvalue;
401
subject_accver = myargs [1].strvalue;
403
blast_inputfile = myargs [0].strvalue;
404
blast_inputfile1 = myargs [1].strvalue;
406
blast_outputfile = myargs [4].strvalue;
408
program_name = StringSave(myargs[2].strvalue);
942
Boolean html, seqannot_output, believe_query;
943
Uint1 tabular_output;
944
Boolean gapped_calculation;
946
entrez_lookup = (Boolean) myargs[ARG_ACCN].intvalue;
947
html = (Boolean) myargs[ARG_HTML].intvalue;
948
seqannot_output = (myargs[ARG_ASNOUT].strvalue != NULL);
950
blast_outputfile = myargs [ARG_OUT].strvalue;
952
program_name = StringSave(myargs[ARG_PROGRAM].strvalue);
409
953
if (StringCmp(program_name, "blastn") &&
410
954
StringCmp(program_name, "blastp") &&
411
955
StringCmp(program_name, "blastx") &&
412
956
StringCmp(program_name, "tblastn") &&
413
957
StringCmp(program_name, "tblastx")) {
414
ErrPostEx(SEV_FATAL, 0, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
958
ErrPostEx(SEV_FATAL, 1, 0, "Program name must be blastn, blastp, blastx, tblastn or tblastx\n");
419
962
align_type = BlastGetTypes(program_name, &seq1_is_na, &seq2_is_na);
421
if (!entrez_lookup && (infp = FileOpen(blast_inputfile, "r")) == NULL)
423
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open input file %s\n", blast_inputfile);
427
if (!entrez_lookup && (infp1 = FileOpen(blast_inputfile1, "r")) == NULL)
429
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open input file %s\n", blast_inputfile1);
433
964
if ((outfp = FileOpen(blast_outputfile, "w")) == NULL)
435
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open output file %s\n", blast_outputfile);
966
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", blast_outputfile);
439
970
global_fp = outfp;
440
options = BLASTOptionNew(program_name, (Boolean) myargs[3].intvalue);
441
options->is_megablast_search = (Boolean) myargs[18].intvalue;
444
query_bsp = BioseqFromAccession(query_accver, 1, seq1_is_na);
446
if (myargs[25].intvalue)
447
sep = FastaToSeqEntryForDb(infp, seq1_is_na, NULL, FALSE,
449
&options->query_lcase_mask);
451
sep = FastaToSeqEntry(infp, seq1_is_na);
456
SeqEntryExplore(sep, &query_bsp, FindNuc);
458
SeqEntryExplore(sep, &query_bsp, FindProt);
462
if (query_bsp == NULL) {
463
ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
467
if (myargs[24].intvalue == 0)
971
gapped_calculation = (Boolean) myargs[ARG_GAPPED].intvalue;
972
believe_query = (seqannot_output || entrez_lookup);
974
options = BLASTOptionNewEx(program_name, gapped_calculation,
975
(Boolean) myargs[ARG_USEMEGABLAST].intvalue);
977
if (BL2SEQ_GetSequences(seq1_is_na, seq2_is_na, &query_bsp, &subject_bsp, &sep, &sep1,
978
&(options->query_lcase_mask), believe_query) == FALSE)
980
ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to get sequences");
468
985
fake_bsp = BlastMakeFakeBioseq(query_bsp, NULL);
470
subject_bsp = BioseqFromAccession(subject_accver, 2, seq2_is_na);
472
if (myargs[6].strvalue != NULL || myargs[17].intvalue != 0
473
|| myargs[24].intvalue != 0)
474
sep1 = FastaToSeqEntry(infp1, seq2_is_na);
476
sep1 = FastaToSeqEntryEx(infp1, seq2_is_na, NULL, FALSE);
481
SeqEntryExplore(sep1, &subject_bsp, FindNuc);
483
SeqEntryExplore(sep1, &subject_bsp, FindProt);
488
if (subject_bsp == NULL) {
489
ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
493
if (myargs[6].strvalue == NULL && myargs[17].intvalue == 0 &&
494
myargs[24].intvalue == 0) {
495
if (!entrez_lookup) {
496
fake_subject_bsp = BioseqNew();
497
fake_subject_bsp->descr = subject_bsp->descr;
498
fake_subject_bsp->repr = subject_bsp->repr;
499
fake_subject_bsp->mol = subject_bsp->mol;
500
fake_subject_bsp->length = subject_bsp->length;
501
fake_subject_bsp->seq_data = subject_bsp->seq_data;
502
fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
503
dbtagptr = DbtagNew();
504
dbtagptr->db = StringSave("BL_ORD_ID");
505
dbtagptr->tag = ObjectIdNew();
506
dbtagptr->tag->id = 0;
507
ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
510
SeqIdWrite(subject_bsp->id, tmp, PRINTID_FASTA_LONG, 255);
511
fake_subject_bsp = BlastMakeFakeBioseq(subject_bsp, tmp);
514
if (options->is_megablast_search) {
515
options->gap_open = options->gap_extend = 0;
516
options->wordsize = 28;
517
options->block_width = 0;
520
if (myargs[19].floatvalue)
521
options->searchsp_eff = (Nlm_FloatHi) myargs[19].floatvalue;
524
options->filter_string = StringSave(myargs[14].strvalue);
525
options->expect_value = (Nlm_FloatHi) myargs [15].floatvalue;
987
fake_subject_bsp = BioseqNew();
988
fake_subject_bsp->descr = subject_bsp->descr;
989
fake_subject_bsp->repr = subject_bsp->repr;
990
fake_subject_bsp->mol = subject_bsp->mol;
991
fake_subject_bsp->length = subject_bsp->length;
992
fake_subject_bsp->seq_data = subject_bsp->seq_data;
993
fake_subject_bsp->seq_data_type = subject_bsp->seq_data_type;
994
dbtagptr = DbtagNew();
995
dbtagptr->db = StringSave("BL_ORD_ID");
996
dbtagptr->tag = ObjectIdNew();
997
dbtagptr->tag->str = StringSave(BioseqGetTitle(subject_bsp));
998
ValNodeAddPointer(&fake_subject_bsp->id, SEQID_GENERAL, dbtagptr);
1000
tabular_output = (Uint1) myargs[ARG_FORMAT].intvalue;
1003
if (myargs[ARG_SEARCHSP].floatvalue)
1004
options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1007
options->filter_string = StringSave(myargs[ARG_FILTER].strvalue);
1008
options->expect_value = (Nlm_FloatHi) myargs [ARG_EVALUE].floatvalue;
527
1010
if (StringICmp("blastn", program_name) == 0)
529
options->penalty = myargs[12].intvalue;
530
options->reward = myargs[13].intvalue;
1012
options->penalty = myargs[ARG_MISMATCH].intvalue;
1013
options->reward = myargs[ARG_MATCH].intvalue;
533
options->db_length = StringToInt8(myargs[5].strvalue, &dummystr);
1016
options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
535
1018
options->discontinuous = FALSE;
537
if (myargs[7].intvalue != 0)
538
options->gap_open = myargs[7].intvalue;
539
if (myargs[8].intvalue != 0)
540
options->gap_extend = myargs[8].intvalue;
541
if (myargs[9].intvalue != 0)
1020
if (myargs[ARG_XDROP].intvalue != 0)
543
options->gap_x_dropoff = myargs[9].intvalue;
1022
options->gap_x_dropoff = myargs[ARG_XDROP].intvalue;
545
if (myargs[10].intvalue != 0)
546
options->wordsize = (Int2) myargs[10].intvalue;
1024
if (myargs[ARG_WORDSIZE].intvalue != 0)
1025
options->wordsize = (Int2) myargs[ARG_WORDSIZE].intvalue;
548
1027
if (options->is_megablast_search) {
549
options->cutoff_s2 = options->wordsize;
550
options->wordsize += 4;
551
options->cutoff_s = options->wordsize;
1028
options->cutoff_s2 = options->wordsize*options->reward;
553
MemFree(options->matrix);
554
options->matrix = myargs[11].strvalue;
556
options->strand_option = myargs[16].intvalue;
558
/* Input longest intron length is in nucleotide scale; in the lower level
559
code it will be used in protein scale */
560
if (myargs[21].intvalue > 0)
561
options->longest_intron = MAX(myargs[21].intvalue, MAX_INTRON_LENGTH);
562
if (myargs[24].intvalue == 1) {
563
options->output = (VoidPtr) outfp;
564
if (options->is_megablast_search)
565
handle_results = MegaBlastPrintAlignInfo;
567
handle_results = BlastPrintAlignInfo;
569
handle_results = NULL;
571
if (myargs[6].strvalue || myargs[17].intvalue ||
1030
options->matrix = MemFree(options->matrix);
1031
BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0);
1033
if (myargs[ARG_GAPOPEN].intvalue != -1)
1034
options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1035
if (myargs[ARG_GAPEXT].intvalue != -1)
1036
options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1038
options->strand_option = myargs[ARG_STRAND].intvalue;
1040
/* Input longest intron length is in nucleotide scale; in the lower
1041
level code it will be used in protein scale */
1042
if (myargs[ARG_INTRON].intvalue > 0)
1043
options->longest_intron =
1044
MAX(myargs[ARG_INTRON].intvalue, MAX_INTRON_LENGTH);
1046
if (believe_query) {
573
1047
bsp1 = query_bsp;
576
1049
bsp1 = fake_bsp;
577
bsp2 = fake_subject_bsp;
580
if (!myargs[22].strvalue && !myargs[23].strvalue) {
581
seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name, options, &other_returns, &error_returns, handle_results);
1052
bsp2 = fake_subject_bsp;
1054
if (!myargs[ARG_LOC1].strvalue && !myargs[ARG_LOC2].strvalue) {
1055
seqalign = BlastTwoSequencesWithCallback(bsp1, bsp2, program_name,
1056
options, &other_returns, &error_returns, handle_results);
583
/* Location(s) are provided */
584
CharPtr delimiters = " ,;";
587
SeqLocPtr slp1 = NULL, slp2 = NULL;
588
location = myargs[22].strvalue;
590
from = atoi(StringTokMT(location, delimiters, &location)) - 1;
591
to = atoi(location) - 1;
595
to = bsp1->length - 1;
596
to = MIN(to, bsp1->length - 1);
597
if (from >= bsp1->length) {
598
ErrPostEx(SEV_FATAL, 0, 0,
599
"Location outside of the first sequence range\n");
602
slp1 = SeqLocIntNew(from, to, options->strand_option,
603
SeqIdFindBestAccession(bsp1->id));
605
ValNodeAddPointer(&slp1, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp1->id)));
607
location = myargs[23].strvalue;
609
from = atoi(StringTokMT(location, delimiters, &location)) - 1;
610
to = atoi(location) - 1;
614
to = bsp2->length - 1;
615
to = MIN(to, bsp2->length - 1);
616
if (from >= bsp2->length) {
617
ErrPostEx(SEV_FATAL, 0, 0,
618
"Location outside of the second sequence range\n");
621
slp2 = SeqLocIntNew(from, to, Seq_strand_plus, SeqIdFindBestAccession(bsp2->id));
623
ValNodeAddPointer(&slp2, SEQLOC_WHOLE, SeqIdDup(SeqIdFindBestAccession(bsp2->id)));
625
seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results);
1058
SeqLocPtr slp1=NULL, slp2=NULL;
1059
if (BL2SEQ_MakeSeqLoc(bsp1, bsp2, &slp1, &slp2, options->strand_option) == FALSE)
1061
seqalign = BlastTwoSequencesByLocWithCallback(slp1, slp2, program_name, options, &other_returns, &error_returns, handle_results, NULL);
626
1062
SeqLocFree(slp1);
627
1063
SeqLocFree(slp2);