788
void PrintTabularOutputHeader(CharPtr blast_database, BioseqPtr query_bsp,
789
SeqLocPtr query_slp, CharPtr blast_program,
790
Int4 iteration, Boolean believe_query,
793
Char buffer[BUFFER_LENGTH+1];
794
Boolean unlock_bioseq = FALSE;
796
asn2ff_set_output(outfp, NULL);
798
ff_StartPrint(0, 0, BUFFER_LENGTH, NULL);
801
CharPtr program = StringSave(blast_program);
802
Nlm_StrUpper(program);
803
sprintf(buffer, "# %s %s [%s]", program, BlastGetVersionNumber(),
804
BlastGetReleaseDate());
806
ff_AddString(buffer);
811
ff_AddString("# Iteration: ");
812
ff_AddString(Ltostr((long) iteration, 1));
816
if (query_bsp || query_slp) {
818
const CharPtr str = "# Query: ";
819
Int4 string_length = StrLen(str);
824
Int4 num_queries = ValNodeLen(query_slp);
825
if (num_queries > 1) {
826
/* Multiple queries: just print the number, without deflines. */
827
sprintf(buffer, "%ld sequences", (long)num_queries);
828
ff_AddString(buffer);
830
query_bsp = BioseqLockById(SeqLocId(query_slp));
831
unlock_bioseq = TRUE;
835
if (query_bsp->id && believe_query) {
836
SeqIdWrite(query_bsp->id, buffer, PRINTID_FASTA_LONG,
838
if (StringNCmp(buffer, "lcl|", 4) == 0) {
839
ff_AddString(buffer+4);
841
ff_AddString(buffer);
843
string_length += StrLen(buffer);
845
string_length++; /* to account for the space above. */
848
if ((title = BioseqGetTitle(query_bsp)) != NULL) {
849
/* We do this to keep the entire title on one line
850
(of length BUFFER_LENGTH). */
851
StrNCpy(buffer, title, BUFFER_LENGTH - string_length);
852
buffer[BUFFER_LENGTH - string_length] = NULLB;
853
ff_AddString(buffer);
857
BioseqUnlock(query_bsp);
861
if (blast_database) {
862
ff_AddString("# Database: ");
863
ff_AddString(blast_database);
866
if (getenv("PRINT_SEQUENCES")) {
867
ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score, query seq., subject seq.");
869
ff_AddString("# Fields: Query id, Subject id, % identity, alignment length, mismatches, gap openings, q. start, q. end, s. start, s. end, e-value, bit score");
876
BlastBioseqGetNumIdentical(BioseqPtr q_bsp, BioseqPtr s_bsp, Int4 q_start,
877
Int4 s_start, Int4 length,
878
Uint1 q_strand, Uint1 s_strand)
880
SeqLocPtr q_slp, s_slp;
881
SeqPortPtr q_spp, s_spp;
885
if (!q_bsp || !s_bsp)
888
q_slp = SeqLocIntNew(q_start, q_start+length-1, q_strand, q_bsp->id);
889
s_slp = SeqLocIntNew(s_start, s_start+length-1, s_strand, s_bsp->id);
890
if (ISA_na(q_bsp->mol))
891
q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbi4na);
893
q_spp = SeqPortNewByLoc(q_slp, Seq_code_ncbistdaa);
894
if (ISA_na(s_bsp->mol))
895
s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbi4na);
897
s_spp = SeqPortNewByLoc(s_slp, Seq_code_ncbistdaa);
899
for (i=0; i<length; i++) {
900
while ((q_res = SeqPortGetResidue(q_spp)) != SEQPORT_EOF &&
902
while ((s_res = SeqPortGetResidue(s_spp)) != SEQPORT_EOF &&
904
if (q_res == SEQPORT_EOF || s_res == SEQPORT_EOF)
906
else if (q_res == s_res)
918
Function to print results in tab-delimited format, given a SeqAlign list.
919
q_shift and s_shift are the offsets in query and subject in case of a
922
void BlastPrintTabulatedResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
923
SeqLocPtr query_slp, Int4 num_alignments,
924
CharPtr blast_program, Boolean is_ungapped,
925
Boolean believe_query, Int4 q_shift,
926
Int4 s_shift, FILE *fp,
927
Boolean print_query_info)
929
BlastPrintTabulatedResultsEx(seqalign, query_bsp, query_slp, num_alignments,
930
blast_program, is_ungapped, believe_query,
931
q_shift, s_shift, fp, NULL, print_query_info);
934
void BlastPrintTabulatedResultsEx(SeqAlignPtr seqalign, BioseqPtr query_bsp,
935
SeqLocPtr query_slp, Int4 num_alignments,
936
CharPtr blast_program, Boolean is_ungapped,
937
Boolean believe_query, Int4 q_shift,
938
Int4 s_shift, FILE *fp,
939
int *num_formatted, Boolean print_query_info)
941
BlastPrintTabularResults(seqalign, query_bsp, query_slp, num_alignments,
942
blast_program, is_ungapped, FALSE, believe_query,
943
q_shift, s_shift, fp, num_formatted, print_query_info);
946
void BlastPrintTabularResults(SeqAlignPtr seqalign, BioseqPtr query_bsp,
947
SeqLocPtr query_slp, Int4 num_alignments, CharPtr blast_program,
948
Boolean is_ungapped, Boolean is_ooframe, Boolean believe_query,
949
Int4 q_shift, Int4 s_shift, FILE *fp, int *num_formatted,
950
Boolean print_query_info)
952
SeqAlignPtr sap, sap_tmp = NULL;
953
FloatHi perc_ident, bit_score, evalue;
954
Int4 numseg, num_gap_opens, num_mismatches, num_ident, score;
955
Int4 number, align_length, index, i;
956
Int4 q_start, q_end, s_start, s_end;
957
Char bit_score_buff[10];
959
Boolean is_translated;
960
SeqIdPtr query_id, old_query_id = NULL, subject_id, old_subject_id = NULL;
961
BioseqPtr subject_bsp=NULL;
962
Char query_buffer[BUFFER_LENGTH+1], subject_buffer[BUFFER_LENGTH+1];
964
StdSegPtr ssp = NULL;
965
DenseDiagPtr ddp = NULL;
966
AlignSumPtr asp = NULL;
967
CharPtr defline, title;
969
Int4 alignments_count;
971
is_translated = (StringCmp(blast_program, "blastn") &&
972
StringCmp(blast_program, "blastp"));
975
asp = MemNew(sizeof(AlignSum));
976
asp->matrix = load_default_matrix();
978
asp->ooframe = is_ooframe;
982
sap_tmp = SeqAlignNew();
986
query_id = query_bsp->id;
988
/* Evalue buffer is dynamically allocated to avoid compiler warnings
989
in calls to ScoreAndEvalueToBuffers. */
990
eval_buff = Malloc(10);
992
for (sap = seqalign; sap; sap = sap->next) {
994
query_id = TxGetQueryIdFromSeqAlign(sap);
995
if (SeqIdComp(query_id, old_query_id) != SIC_YES) {
996
if (old_query_id && num_formatted)
998
alignments_count = num_alignments;
999
/* New query: find the corresponding SeqLoc */
1000
while (slp && SeqIdComp(query_id, SeqLocId(slp)) != SIC_YES)
1003
query_id = old_query_id = SeqLocId(slp);
1004
/* Print new query information */
1005
if (print_query_info)
1006
PrintTabularOutputHeader(NULL, NULL, slp, NULL, 0,
1008
} else if (query_bsp)
1009
old_query_id = query_bsp->id;
1010
defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1011
SeqIdWrite(query_id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1012
if (StringNCmp(defline, "lcl|", 4))
1013
StringCpy(query_buffer, defline);
1014
else if (!believe_query) {
1016
BioseqUnlock(query_bsp);
1017
query_bsp = BioseqLockById(query_id);
1019
if ((title = StringSave(BioseqGetTitle(query_bsp))) != NULL) {
1020
defline = MemFree(defline);
1021
defline = StringTokMT(title, " ", &title);
1022
StringNCpy_0(query_buffer, defline, BUFFER_LENGTH);
1023
defline = MemFree(defline);
1025
StringCpy(query_buffer, defline+4);
1026
defline = MemFree(defline);
1028
StringCpy(query_buffer, defline+4);
1030
query_id = old_query_id;
1032
subject_id = TxGetSubjectIdFromSeqAlign(sap);
1034
if (SeqIdComp(subject_id, old_subject_id) != SIC_YES) {
1035
/* New subject sequence has been found in the seqalign list */
1036
if (--alignments_count < 0)
1038
BioseqUnlock(subject_bsp);
1039
subject_bsp = BioseqLockById(subject_id);
1041
if (!subject_bsp || !subject_bsp->id)
1043
if (subject_bsp->id->choice != SEQID_GENERAL ||
1044
StringCmp(((DbtagPtr)subject_id->data.ptrvalue)->db, "BL_ORD_ID")) {
1045
defline = (CharPtr) Malloc(BUFFER_LENGTH+1);
1046
SeqIdWrite(subject_bsp->id, defline, PRINTID_FASTA_LONG, BUFFER_LENGTH);
1047
if (StringNCmp(defline, "lcl|", 4))
1048
StringCpy(subject_buffer, defline);
1050
StringCpy(subject_buffer, defline+4);
1052
defline = StringSave(BioseqGetTitle(subject_bsp));
1053
defline = StringTokMT(defline, " \t", &title);
1054
StringCpy(subject_buffer, defline);
1056
defline = MemFree(defline);
1064
GetScoreAndEvalue(sap, &score, &bit_score, &evalue, &number);
1066
/* Do not allow knocking off digit in evalue buffer, so parsers are
1068
ScoreAndEvalueToBuffers(bit_score, evalue,
1069
bit_score_buff, &eval_buff, 0);
1071
/* Loop on segments within this seqalign (in ungapped case) */
1073
if (sap->segtype == SAS_DENSEG) {
1074
Boolean get_num_ident = TRUE;
1075
dsp = (DenseSegPtr) sap->segs;
1076
numseg = dsp->numseg;
1077
/* Query Bioseq is needed for calculating number of identities.
1078
NB: even if number of identities is already filled in the
1079
seqalign score list, that is not enough here, because we need to
1080
know number of identities in each segment in order to calculate
1081
number of mismatches correctly. */
1083
query_bsp = BioseqLockById(query_id);
1086
for (i=0; i<numseg; i++) {
1087
align_length += dsp->lens[i];
1088
if (dsp->starts[2*i] != -1 && dsp->starts[2*i+1] != -1) {
1089
if (get_num_ident) {
1090
num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
1091
dsp->starts[2*i], dsp->starts[2*i+1],
1092
dsp->lens[i], dsp->strands[2*i],
1093
dsp->strands[2*i+1]);
1094
perc_ident += num_ident;
1095
num_mismatches += dsp->lens[i] - num_ident;
1101
perc_ident = perc_ident / align_length * 100;
1103
if (dsp->strands[0] != dsp->strands[1]) {
1104
q_start = dsp->starts[2*numseg-2] + 1;
1105
q_end = dsp->starts[0] + dsp->lens[0];
1106
s_end = dsp->starts[1] + 1;
1107
s_start = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
1109
q_start = dsp->starts[0] + 1;
1110
q_end = dsp->starts[2*numseg-2] + dsp->lens[numseg-1];
1111
s_start = dsp->starts[1] + 1;
1112
s_end = dsp->starts[2*numseg-1] + dsp->lens[numseg-1];
1114
} else if (sap->segtype == SAS_STD) {
1116
ssp = (StdSegPtr) sap->segs;
1119
sap_tmp->segtype = SAS_STD;
1120
sap_tmp->segs = ssp;
1121
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1122
ScoreAndEvalueToBuffers(bit_score, evalue,
1123
bit_score_buff, &eval_buff, 0);
1124
find_score_in_align(sap_tmp, 1, asp);
1126
find_score_in_align(sap, 1, asp);
1128
if (asp->m_frame < 0)
1129
q_start = SeqLocStop(ssp->loc) + 1;
1131
q_start = SeqLocStart(ssp->loc) + 1;
1133
if (asp->t_frame < 0)
1134
s_start = SeqLocStop(ssp->loc->next) + 1;
1136
s_start = SeqLocStart(ssp->loc->next) + 1;
1139
for (index=1; ssp->next; index++)
1141
num_gap_opens = index / 2;
1145
if (asp->m_frame < 0)
1146
q_end = SeqLocStart(ssp->loc) + 1;
1148
q_end = SeqLocStop(ssp->loc) + 1;
1150
if (asp->t_frame < 0)
1151
s_end = SeqLocStart(ssp->loc->next) + 1;
1153
s_end = SeqLocStop(ssp->loc->next) + 1;
1155
align_length = asp->totlen;
1156
num_mismatches = asp->totlen - asp->gaps - asp->identical;
1157
perc_ident = ((FloatHi) 100*asp->identical)/ (asp->totlen);
1158
} else if (sap->segtype == SAS_DENDIAG) {
1160
ddp = (DenseDiagPtr) sap->segs;
1161
sap_tmp->segtype = SAS_DENDIAG;
1162
sap_tmp->segs = ddp;
1163
GetScoreAndEvalue(sap_tmp, &score, &bit_score, &evalue, &number);
1164
ScoreAndEvalueToBuffers(bit_score, evalue,
1165
bit_score_buff, &eval_buff, 0);
1167
align_length = ddp->len;
1168
if (ddp->strands[0] == Seq_strand_minus) {
1169
q_start = ddp->starts[0] + align_length;
1170
q_end = ddp->starts[0] + 1;
1172
q_start = ddp->starts[0] + 1;
1173
q_end = ddp->starts[0] + align_length;
1176
if (ddp->strands[1] == Seq_strand_minus) {
1177
s_start = ddp->starts[1] + align_length;
1178
s_end = ddp->starts[1] + 1;
1180
s_start = ddp->starts[1] + 1;
1181
s_end = ddp->starts[1] + align_length;
1184
/* Query Bioseq is needed for calculating number of identities.
1185
NB: even if number of identities is already filled in the
1186
seqalign score list, that is not enough here, because we need to
1187
know number of identities in each segment in order to calculate
1188
number of mismatches correctly. */
1190
query_bsp = BioseqLockById(query_id);
1193
num_ident = BlastBioseqGetNumIdentical(query_bsp, subject_bsp,
1194
ddp->starts[0], ddp->starts[1], align_length,
1195
ddp->strands[0], ddp->strands[1]);
1196
num_mismatches = align_length - num_ident;
1197
perc_ident = ((FloatHi)num_ident) / align_length * 100;
1199
if (!is_translated) {
1200
/* Adjust coordinates if query and/or subject is a subsequence */
1207
if (perc_ident >= 99.995 && perc_ident < 100.00)
1211
"%s\t%s\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\t%s\n",
1212
query_buffer, subject_buffer, perc_ident, align_length,
1213
num_mismatches, num_gap_opens, q_start,
1214
q_end, s_start, s_end, eval_buff, bit_score_buff);
1215
old_subject_id = subject_id;
1216
if (sap->segtype == SAS_DENSEG)
1218
else if (sap->segtype == SAS_DENDIAG) {
1219
if ((ddp = ddp->next) == NULL)
1221
} else if (sap->segtype == SAS_STD) {
1222
if ((ssp = ssp->next) == NULL)
1228
eval_buff = MemFree(eval_buff);
1231
sap_tmp = MemFree(sap_tmp);
1233
if (is_translated) {
1234
free_default_matrix(asp->matrix);
1238
BioseqUnlock(subject_bsp);
1240
BioseqUnlock(query_bsp);
1245
/* Mutex for assignment of db seqs to search. */
1246
TNlmMutex err_message_mutex=NULL;
1248
#define BLAST_ERROR_BULEN 50
1250
The following functions fill a the Error user string with
1251
text to identify BLAST and the entry being worked on.
1252
The SeqIdPtr is used to make a FASTA id, which is appended
1255
A Uint1 is returned, which allows Nlm_ErrUserDelete to delete
1256
this error string when it's done.
1260
BlastSetUserErrorString(CharPtr string, SeqIdPtr sip, Boolean use_id)
1264
Char buffer[2*BLAST_ERROR_BULEN+1], textid[BLAST_ERROR_BULEN+1];
1265
CharPtr buf_start, ptr, title;
1266
Int2 length=0, index;
1270
ptr = buf_start = &buffer[0];
1273
StringNCpy_0(ptr, string, BLAST_ERROR_BULEN);
1277
bsp = BioseqLockById(sip);
1283
title = BioseqGetTitle(bsp);
1288
length = StringLen(string);
1289
if (length > BLAST_ERROR_BULEN)
1290
length = BLAST_ERROR_BULEN;
1297
SeqIdWrite(sip, textid, PRINTID_FASTA_LONG, BLAST_ERROR_BULEN-1);
1298
StringNCpy_0(ptr, textid, BLAST_ERROR_BULEN-1);
1302
for (index=0; index<BLAST_ERROR_BULEN-1; index++)
1304
if (title[index] == NULLB || title[index] == ' ')
1308
*ptr = title[index];
1314
StringCpy(ptr+StringLen(ptr), ":");
1316
NlmMutexLockEx(&err_message_mutex);
1317
retval = Nlm_ErrUserInstall (buf_start, 0);
1318
NlmMutexUnlock(err_message_mutex);
1324
BlastDeleteUserErrorString(Uint1 err_id)
1327
NlmMutexLockEx(&err_message_mutex);
1328
Nlm_ErrUserDelete(err_id);
1329
NlmMutexUnlock(err_message_mutex);