4519
#define BLAST_ERROR_BULEN 50
4521
The following functions fill a the Error user string with
4522
text to identify BLAST and the entry being worked on.
4523
The SeqIdPtr is used to make a FASTA id, which is appended
4526
A Uint1 is returned, which allows Nlm_ErrUserDelete to delete
4527
this error string when it's done.
4531
BlastSetUserErrorString(CharPtr string, SeqIdPtr sip, Boolean use_id)
4535
Char buffer[2*BLAST_ERROR_BULEN+1], textid[BLAST_ERROR_BULEN+1];
4536
CharPtr buf_start, ptr, title;
4537
Int2 length=0, index;
4541
ptr = buf_start = &buffer[0];
4544
StringNCpy_0(ptr, string, BLAST_ERROR_BULEN);
4548
bsp = BioseqLockById(sip);
4554
title = BioseqGetTitle(bsp);
4559
length = StringLen(string);
4560
if (length > BLAST_ERROR_BULEN)
4561
length = BLAST_ERROR_BULEN;
4568
SeqIdWrite(sip, textid, PRINTID_FASTA_LONG, BLAST_ERROR_BULEN-1);
4569
StringNCpy_0(ptr, textid, BLAST_ERROR_BULEN-1);
4573
for (index=0; index<BLAST_ERROR_BULEN-1; index++)
4575
if (title[index] == NULLB || title[index] == ' ')
4579
*ptr = title[index];
4585
StringCpy(ptr+StringLen(ptr), ":");
4587
NlmMutexLockEx(&err_message_mutex);
4588
retval = Nlm_ErrUserInstall (buf_start, 0);
4589
NlmMutexUnlock(err_message_mutex);
4595
BlastDeleteUserErrorString(Uint1 err_id)
4598
NlmMutexLockEx(&err_message_mutex);
4599
Nlm_ErrUserDelete(err_id);
4600
NlmMutexUnlock(err_message_mutex);
4605
BlastBioseqGetNumIdentical(BioseqPtr q_bsp, BioseqPtr s_bsp, Int4 q_start,
4606
Int4 s_start, Int4 length,
4607
Uint1 q_strand, Uint1 s_strand)
4609
SeqLocPtr q_slp, s_slp;
4610
SeqPortPtr q_spp, s_spp;
4614
if (!q_bsp || !s_bsp)
4617
q_slp = SeqLocIntNew(q_start, q_start+length-1, q_strand, q_bsp->id);
4618
s_slp = SeqLocIntNew(s_start, s_start+length-1, s_strand, s_bsp->id);
4619
if (ISA_na(q_bsp->mol))
4620
q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbi4na);
4622
q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbistdaa);
4623
if (ISA_na(s_bsp->mol))
4624
s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbi4na);
4626
s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbistdaa);
4628
for (i=0; i<length; i++) {
4629
while ((q_res = SeqPortGetResidue(q_spp)) != SEQPORT_EOF &&
4630
!IS_residue(q_res));
4631
while ((s_res = SeqPortGetResidue(s_spp)) != SEQPORT_EOF &&
4632
!IS_residue(s_res));
4633
if (q_res == SEQPORT_EOF || s_res == SEQPORT_EOF)
4635
else if (q_res == s_res)
4648
4529
BlastGetNumIdentical(Uint1Ptr query, Uint1Ptr subject, Int4 q_start,
4649
4530
Int4 s_start, Int4 length, Boolean reverse)
4674
Function to print results in tab-delimited format, given a SeqAlign list.
4675
q_shift and s_shift are the offsets in query and subject in case of a
4678
void BlastPrintTabulatedResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
4679
SeqLocPtr query_slp, Int4 num_alignments,
4680
CharPtr blast_program, Boolean is_ungapped,
4681
Boolean believe_query, Int4 q_shift,
4682
Int4 s_shift, FILE *fp,
4683
Boolean print_query_info)
4685
BlastPrintTabulatedResultsEx(seqalign, query_bsp, query_slp, num_alignments,
4686
blast_program, is_ungapped, believe_query,
4687
q_shift, s_shift, fp, NULL, print_query_info);
4690
void BlastPrintTabulatedResultsEx(SeqAlignPtr seqalign, BioseqPtr query_bsp,
4691
SeqLocPtr query_slp, Int4 num_alignments,
4692
CharPtr blast_program, Boolean is_ungapped,
4693
Boolean believe_query, Int4 q_shift,
4694
Int4 s_shift, FILE *fp,
4695
int *num_formatted, Boolean print_query_info)
4697
BlastPrintTabularResults(seqalign, query_bsp, query_slp, num_alignments,
4698
blast_program, is_ungapped, FALSE, believe_query,
4699
q_shift, s_shift, fp, num_formatted, print_query_info);
4702
void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
4703
SeqLocPtr query_slp, Int4 num_alignments, CharPtr blast_program,
4704
Boolean is_ungapped, Boolean is_ooframe, Boolean believe_query,
4705
Int4 q_shift, Int4 s_shift, FILE *fp, int *num_formatted,
4706
Boolean print_query_info)
4708
SeqAlignPtr sap, sap_tmp = NULL;
4709
FloatHi perc_ident, bit_score, evalue;
4710
Int4 numseg, num_gap_opens, num_mismatches, num_ident, score;
4711
Int4 number, align_length, index, i;
4712
Int4 q_start, q_end, s_start, s_end;
4713
Char bit_score_buff[10];
4715
Boolean is_translated;
4716
SeqIdPtr query_id, old_query_id = NULL, subject_id, old_subject_id = NULL;
4717
BioseqPtr subject_bsp=NULL;
4718
Char query_buffer[BUFFER_LENGTH+1], subject_buffer[BUFFER_LENGTH+1];
4720
StdSegPtr ssp = NULL;
4721
DenseDiagPtr ddp = NULL;
4722
AlignSumPtr asp = NULL;
4723
CharPtr defline, title;
4725
Int4 alignments_count;
4727
is_translated = (StringCmp(blast_program, "blastn") &&
4728
StringCmp(blast_program, "blastp"));
4730
if (is_translated) {
4731
asp = MemNew(sizeof(AlignSum));
4732
asp->matrix = load_default_matrix();
4734
asp->ooframe = is_ooframe;
4738
sap_tmp = SeqAlignNew();
4742
query_id = query_bsp->id;
4744
/* Evalue buffer is dynamically allocated to avoid compiler warnings
4745
in calls to ScoreAndEvalueToBuffers. */
4746
eval_buff = Malloc(10);
4748
for (sap = seqalign; sap; sap = sap->next) {
4750
query_id = TxGetQueryIdFromSeqAlign(sap);
4751
if (SeqIdComp(query_id, old_query_id) != SIC_YES) {
4752
if (old_query_id && num_formatted)
4754
alignments_count = num_alignments;
4755
/* New query: find the corresponding SeqLoc */
4756
while (slp && SeqIdComp(query_id, SeqLocId(slp)) != SIC_YES)
4759
query_id = old_query_id = SeqLocId(slp);
4760
/* Print new query information */
4761
if (print_query_info)
4762
PrintTabularOutputHeader(NULL, NULL, slp, NULL, 0,
4764
} else if (query_bsp)
4765
old_query_id = query_bsp->id;
4766
defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
4767
SeqIdWrite(query_id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4768
if (StringNCmp(defline, "lcl|", 4))
4769
StringCpy(query_buffer, defline);
4770
else if (!believe_query) {
4772
BioseqUnlock(query_bsp);
4773
query_bsp = BioseqLockById(query_id);
4775
if ((title = StringSave(BioseqGetTitle(query_bsp))) != NULL) {
4776
defline = MemFree(defline);
4777
defline = StringTokMT(title, " ", &title);
4778
StringNCpy_0(query_buffer, defline, BUFFER_LENGTH);
4779
defline = MemFree(defline);
4781
StringCpy(query_buffer, defline+4);
4782
defline = MemFree(defline);
4784
StringCpy(query_buffer, defline+4);
4786
query_id = old_query_id;
4788
subject_id = TxGetSubjectIdFromSeqAlign(sap);
4790
if (SeqIdComp(subject_id, old_subject_id) != SIC_YES) {
4791
/* New subject sequence has been found in the seqalign list */
4792
if (--alignments_count < 0)
4794
BioseqUnlock(subject_bsp);
4795
subject_bsp = BioseqLockById(subject_id);
4797
if (!subject_bsp || !subject_bsp->id)
4799
if (subject_bsp->id->choice != SEQID_GENERAL ||
4800
StringCmp(((DbtagPtr)subject_id->data.ptrvalue)->db, "BL_ORD_ID")) {
4801
defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
4802
SeqIdWrite(subject_bsp->id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4803
if (StringNCmp(defline, "lcl|", 4))
4804
StringCpy(subject_buffer, defline);
4806
StringCpy(subject_buffer, defline+4);
4808
defline = StringSave(BioseqGetTitle(subject_bsp));
4809
defline = StringTokMT(defline, " \t", &title);
4810
StringCpy(subject_buffer, defline);
4812
defline = MemFree(defline);
4820
GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
4822
/* Do not allow knocking off digit in evalue buffer, so parsers are
4824
ScoreAndEvalueToBuffers(bit_score, evalue,
4825
bit_score_buff, &eval_buff, 0);
4827
/* Loop on segments within this seqalign (in ungapped case) */
4829
if (sap->segtype == SAS_DENSEG) {
4830
Boolean get_num_ident = TRUE;
4831
dsp = (DenseSegPtr) sap->segs;
4832
numseg = dsp->numseg;
4833
/* Query Bioseq is needed for calculating number of identities.
4834
NB: even if number of identities is already filled in the
4835
seqalign score list, that is not enough here, because we need to
4836
know number of identities in each segment in order to calculate
4837
number of mismatches correctly. */
4839
query_bsp = BioseqLockById(query_id);
4842
for (i=0; i<numseg; i++) {
4843
align_length += dsp->lens[i];
4844
if (dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1) {
4845
if (get_num_ident) {
4846
num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
4847
dsp->starts[2*i], dsp->starts[2*i+1],
4848
dsp->lens[i], dsp->strands[2*i],
4849
dsp->strands[2*i+1]);
4850
perc_ident += num_ident;
4851
num_mismatches += dsp->lens[i] - num_ident;
4857
perc_ident = perc_ident / align_length * 100;
4859
if (dsp->strands[0] != dsp->strands[1]) {
4860
q_start = dsp->starts[2*numseg-2] + 1;
4861
q_end = dsp->starts[0] + dsp->lens[0];
4862
s_end = dsp->starts[1] + 1;
4863
s_start = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
4865
q_start = dsp->starts[0] + 1;
4866
q_end = dsp->starts[2*numseg-2] + dsp->lens[numseg-1];
4867
s_start = dsp->starts[1] + 1;
4868
s_end = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
4870
} else if (sap->segtype == SAS_STD) {
4872
ssp = (StdSegPtr) sap->segs;
4875
sap_tmp->segtype = SAS_STD;
4876
sap_tmp->segs = ssp;
4877
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
4878
ScoreAndEvalueToBuffers(bit_score, evalue,
4879
bit_score_buff, &eval_buff, 0);
4880
find_score_in_align(sap_tmp, 1, asp);
4882
find_score_in_align(sap, 1, asp);
4884
if (asp->m_frame < 0)
4885
q_start = SeqLocStop(ssp->loc) + 1;
4887
q_start = SeqLocStart(ssp->loc) + 1;
4889
if (asp->t_frame < 0)
4890
s_start = SeqLocStop(ssp->loc->next) + 1;
4892
s_start = SeqLocStart(ssp->loc->next) + 1;
4895
for (index=1; ssp->next; index++)
4897
num_gap_opens = index / 2;
4901
if (asp->m_frame < 0)
4902
q_end = SeqLocStart(ssp->loc) + 1;
4904
q_end = SeqLocStop(ssp->loc) + 1;
4906
if (asp->t_frame < 0)
4907
s_end = SeqLocStart(ssp->loc->next) + 1;
4909
s_end = SeqLocStop(ssp->loc->next) + 1;
4911
align_length = asp->totlen;
4912
num_mismatches = asp->totlen - asp->gaps - asp->identical;
4913
perc_ident = ((FloatHi) 100*asp->identical)/ (asp->totlen);
4914
} else if (sap->segtype == SAS_DENDIAG) {
4916
ddp = (DenseDiagPtr) sap->segs;
4917
sap_tmp->segtype = SAS_DENDIAG;
4918
sap_tmp->segs = ddp;
4919
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
4920
ScoreAndEvalueToBuffers(bit_score, evalue,
4921
bit_score_buff, &eval_buff, 0);
4923
align_length = ddp->len;
4924
if (ddp->strands[0] == Seq_strand_minus) {
4925
q_start = ddp->starts[0] + align_length;
4926
q_end = ddp->starts[0] + 1;
4928
q_start = ddp->starts[0] + 1;
4929
q_end = ddp->starts[0] + align_length;
4932
if (ddp->strands[1] == Seq_strand_minus) {
4933
s_start = ddp->starts[1] + align_length;
4934
s_end = ddp->starts[1] + 1;
4936
s_start = ddp->starts[1] + 1;
4937
s_end = ddp->starts[1] + align_length;
4940
/* Query Bioseq is needed for calculating number of identities.
4941
NB: even if number of identities is already filled in the
4942
seqalign score list, that is not enough here, because we need to
4943
know number of identities in each segment in order to calculate
4944
number of mismatches correctly. */
4946
query_bsp = BioseqLockById(query_id);
4949
num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
4950
ddp->starts[0], ddp->starts[1], align_length,
4951
ddp->strands[0], ddp->strands[1]);
4952
num_mismatches = align_length - num_ident;
4953
perc_ident = ((FloatHi)num_ident) / align_length * 100;
4955
if (!is_translated) {
4956
/* Adjust coordinates if query and/or subject is a subsequence */
4963
if (perc_ident >= 99.995 && perc_ident < 100.00)
4967
"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n",
4968
query_buffer, subject_buffer, perc_ident, align_length,
4969
num_mismatches, num_gap_opens, q_start,
4970
q_end, s_start, s_end, eval_buff, bit_score_buff);
4971
old_subject_id = subject_id;
4972
if (sap->segtype == SAS_DENSEG)
4974
else if (sap->segtype == SAS_DENDIAG) {
4975
if ((ddp = ddp->next) == NULL)
4977
} else if (sap->segtype == SAS_STD) {
4978
if ((ssp = ssp->next) == NULL)
4984
eval_buff = MemFree(eval_buff);
4987
sap_tmp = MemFree(sap_tmp);
4989
if (is_translated) {
4990
free_default_matrix(asp->matrix);
4994
BioseqUnlock(subject_bsp);
4996
BioseqUnlock(query_bsp);
4999
4555
int LIBCALLBACK BlastPrintAlignInfo(VoidPtr srch)
5786
void PrintTabularOutputHeader(CharPtr blast_database, BioseqPtr query_bsp,
5787
SeqLocPtr query_slp, CharPtr blast_program,
5788
Int4 iteration, Boolean believe_query,
5791
Char buffer[BUFFER_LENGTH+1];
5792
Boolean unlock_bioseq = FALSE;
5794
asn2ff_set_output(outfp, NULL);
5796
ff_StartPrint(0, 0, BUFFER_LENGTH, NULL);
5798
if (blast_program) {
5799
CharPtr program = StringSave(blast_program);
5800
Nlm_StrUpper(program);
5801
sprintf(buffer, "# %s %s [%s]", program, BlastGetVersionNumber(),
5802
BlastGetReleaseDate());
5804
ff_AddString(buffer);
5808
if (iteration > 0) {
5809
ff_AddString("# Iteration: ");
5810
ff_AddString(Ltostr((long) iteration, 1));
5814
if (query_bsp || query_slp) {
5816
const CharPtr str = "# Query: ";
5817
Int4 string_length = StrLen(str);
5822
Int4 num_queries = ValNodeLen(query_slp);
5823
if (num_queries > 1) {
5824
/* Multiple queries: just print the number, without deflines. */
5825
sprintf(buffer, "%ld sequences", (long)num_queries);
5826
ff_AddString(buffer);
5828
query_bsp = BioseqLockById(SeqLocId(query_slp));
5829
unlock_bioseq = TRUE;
5833
if (query_bsp->id && believe_query) {
5834
SeqIdWrite(query_bsp->id, buffer, PRINTID_FASTA_LONG,
5836
if (StringNCmp(buffer, "lcl|", 4) == 0) {
5837
ff_AddString(buffer+4);
5839
ff_AddString(buffer);
5841
string_length += StrLen(buffer);
5843
string_length++; /* to account for the space above. */
5846
if ((title = BioseqGetTitle(query_bsp)) != NULL) {
5847
/* We do this to keep the entire title on one line
5848
(of length BUFFER_LENGTH). */
5849
StrNCpy(buffer, title, BUFFER_LENGTH - string_length);
5850
buffer[BUFFER_LENGTH - string_length] = NULLB;
5851
ff_AddString(buffer);
5855
BioseqUnlock(query_bsp);
5859
if (blast_database) {
5860
ff_AddString("# Database: ");
5861
ff_AddString(blast_database);
5864
if (getenv("PRINT_SEQUENCES")) {
5865
ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score, query seq., subject seq.");
5867
ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score");
5873
5343
Boolean FastaCheckDna(CharPtr seq)