1
/* $Id: wwwbutl.c,v 6.27 2001/09/06 20:24:34 dondosha Exp $
2
* ===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* File Name: $RCSfile: wwwbutl.c,v $
28
* Author: Sergei Shavirin
30
* Initial Version Creation Date: 04/21/2000
35
* WWW BLAST/PSI/PHI utilities
38
* Revision 6.27 2001/09/06 20:24:34 dondosha
39
* Removed threshold_first
41
* Revision 6.26 2001/07/20 19:56:04 dondosha
42
* Scale cutoff_s2 for megablast if match reward not 1
44
* Revision 6.25 2001/02/16 15:53:17 dondosha
47
* Revision 6.24 2001/01/05 18:18:28 dondosha
48
* Change reward and penalty scores depending on percent identity
50
* Revision 6.23 2001/01/05 18:07:15 dondosha
51
* Added handling of word size and percent identity fields for options creation
53
* Revision 6.22 2000/11/16 22:35:40 dondosha
54
* Added lower case masking option and endpoints results option
56
* Revision 6.21 2000/10/31 20:21:26 shavirin
57
* Fixed bug with RedoAlignmentCore filtering.
59
* Revision 6.20 2000/10/16 22:18:35 shavirin
60
* Added possibility to perform OOF blastx
62
* Revision 6.19 2000/10/16 20:26:35 shavirin
63
* Added possibility to rum RPS Blast.
65
* Revision 6.18 2000/09/28 16:32:53 dondosha
66
* Compiler warning fix
68
* Revision 6.17 2000/09/27 22:18:03 shavirin
69
* Added possibility to limit search to results of entrez query.
71
* Revision 6.16 2000/09/12 22:01:42 dondosha
72
* Use matrix returned from search during formatting
74
* Revision 6.15 2000/09/08 20:16:59 dondosha
75
* Print informative error messages for bad accessions, still do search if at least one accession is good
77
* Revision 6.14 2000/09/08 17:46:54 dondosha
78
* Allow multiple accessions in input
80
* Revision 6.13 2000/09/07 18:02:58 dondosha
81
* If query has many sequences, put them in a SeqLoc list
83
* Revision 6.12 2000/09/01 21:47:34 dondosha
84
* Make check for wordsize too small; add 4 to user-supplied wordsize for megablast
86
* Revision 6.11 2000/09/01 17:50:59 dondosha
87
* No part of SeqEntry can be freed before the very end
89
* Revision 6.10 2000/09/01 17:30:26 dondosha
90
* Small corrections for megablast with Entrez client
92
* Revision 6.9 2000/08/30 22:20:24 dondosha
93
* Small changes for megablast web page
95
* Revision 6.8 2000/08/28 20:17:42 dondosha
96
* Added functionality for megablast web page
98
* Revision 6.7 2000/08/10 18:17:19 shavirin
99
* Used correct (fake) Bioseq in printing alignmenets.
101
* Revision 6.6 2000/08/10 14:50:37 shavirin
102
* Fixed 64 dependent bug
104
* Revision 6.4 2000/08/09 20:49:01 shavirin
105
* Added support for S&W Blast and XML output.
107
* Revision 6.3 2000/07/31 20:39:23 shavirin
108
* Some formating changes from Haruna Cofer (haruna@detroit.sgi.com)
110
* Revision 6.2 2000/07/26 02:26:16 shavirin
111
* Changes in accordance to Alejandro's S&W Blast.
113
* Revision 6.1 2000/05/17 15:53:40 shavirin
117
* ==========================================================================
120
#include <wwwblast.h>
122
static Int4 GlobalAlignNumber = 0; /* For PSI Blast printing ONLY !*/
124
void WWWBlastInfoFree(WWWBlastInfoPtr theInfo)
127
WWWInfoFree(theInfo->info);
128
BLASTOptionDelete(theInfo->options);
129
MemFree(theInfo->database);
130
MemFree(theInfo->program);
132
for(i = 0; i < MAX_DB_NUM; i++) {
133
MemFree(theInfo->blast_config->allow_db[i]);
135
MemFree(theInfo->blast_config);
137
/* if(!theInfo->believe_query)
138
fake_bsp = BlastDeleteFakeBioseq(fake_bsp); */
140
MemFree(theInfo->blast_type);
141
MemFree(theInfo->ConfigFile);
142
SeqLocSetFree(theInfo->query_slp);
148
void WWWBlastErrMessage(BLASTErrCode error_code, CharPtr error_msg)
150
WWWBlastErrMessageEx(error_code, error_msg, NULL);
153
void WWWBlastErrMessageEx(BLASTErrCode error_code, CharPtr error_msg,
156
CharPtr delim = "<BR>";
158
if(error_code == BLASTNoError)
161
fprintf(stdout, "<FONT color=red><h3>");
162
fprintf(stdout, "Error %ld in submitting BLAST query", labs(error_code));
164
fprintf(stdout, "</h3></FONT> <BR> <b> Accession: %s </b> <BR> <HR>\n<b>", seq_info);
166
fprintf(stdout, "</h3></FONT><HR>\n<b>");
167
fprintf(stdout, "Short error description:");
169
fprintf(stdout, "</b><BR><BR>\n");
175
"Your input sequence may not be found in Entrez %s"
176
"or Entrez access interface currently unavailable. %s"
177
"Please send message to blast_help@ncbi.nlm.nih.gov %s"
178
"with description of your query",
179
delim, delim, delim);
185
"Your input sequence formatted incorrectly. %s"
186
"Please read blast help if you have problems with formatting %s"
187
"or send request to blast help account.",
191
case BLASTErrNoSequence:
194
"Input sequence for the BLAST search, probably missing. %s"
195
"Please see the blast help for a description %s"
196
"of the FASTA sequence format.",
200
case BLASTErrCombination:
203
"The combination of database and program, that you provided in your %s"
204
"message is invalid or not acceptable by BLAST search system. %s"
205
"Please look at current possible combinations in BLAST help. ",
209
case BLASTErrAccType:
212
"You specified a protein (or nucleotide) sequence identifier, %s"
213
"but a nucleotide (or protein) sequence is required for your search.",
218
case BLASTErrDatalib:
220
fprintf(stdout, "No database was specified. ");
223
case BLASTErrNoQueue:
225
"Unable to accept more BLAST jobs right now, %s"
226
"Queue overloaded. Please try again later.",
232
if(error_msg != NULL) {
233
fprintf(stdout, "%s", error_msg);
240
if(error_msg != NULL) {
241
fprintf(stdout, "%s %s", error_msg, delim);
244
"There were some internal software problems while processing %s"
245
"your request. Please contact blast support with a full %s"
246
"description of your query to BLAST as soon as possible.",
252
fprintf(stdout, "\n<HR>\n");
260
Boolean BLAST_Time(CharPtr string, Int4 len, time_t seconds)
264
if(string == NULL || len < 25)
271
if((chptr = ctime(&seconds)) != NULL)
272
StringCpy(string, chptr);
278
WWWBlastInfoPtr WWWBlastReadArgs(CharPtr type)
280
WWWBlastInfoPtr theInfo;
281
WWWErrorCode error = WWWErrOk;
283
CharPtr blast_type, hostname, chptr;
287
theInfo = MemNew(sizeof(WWWBlastInfo));
289
if((error = WWWGetArgs(&theInfo->info)) != WWWErrOk) {
290
WWWBlastErrMessage(BLASTMiscError, NULL);
294
if((chptr = WWWGetQuery(theInfo->info)) == NULL || *chptr == NULLB) {
295
fprintf(stdout, "<META HTTP-EQUIV=\"Refresh\" "
296
"CONTENT=\"2; URL=%s.html\">", type ? type : "blast");
300
#ifdef PRINT_ALL_INPUT /* Printing out all coming data for debugging */
301
for(index= 0; index < WWWGetNumEntries(theInfo->info); index ++) {
302
printf("%s : %s<BR>",
303
WWWGetNameByIndex(theInfo->info, index),
304
WWWGetValueByIndex(theInfo->info, index));
308
if(getenv("DEBUG_COMMAND_LINE") != NULL) {
310
fd = FileOpen("/tmp/__web.in", "w");
311
fprintf(fd, "%s", ((WWWInfoDataPtr)theInfo->info)->query);
315
/* Root path for PSI/PHI Blast images */
316
theInfo->www_root_path = getenv("WWW_ROOT_PATH"); /* May be NULL */
318
if ( !ErrSetLogfile ("/dev/null", ELOG_APPEND) ) {
319
fprintf(stdout, "Cannot set logfile exiting....\n");
322
ErrSetOpts (ERR_CONTINUE, ERR_LOG_ON);
325
/* Config file with program/database relationsship */
327
blast_type = WWWGetValueByName(theInfo->info, "WWW_BLAST_TYPE");
329
if(blast_type == NULL || *blast_type == NULLB) {
330
theInfo->blast_type = StringSave(type ? type : "blast");
331
sprintf(tmp, "%s.rc", theInfo->blast_type);
332
theInfo->ConfigFile = StringSave(tmp);
334
sprintf(tmp, "%s.rc", blast_type);
335
theInfo->blast_type = StringSave(blast_type);
336
theInfo->ConfigFile = StringSave(tmp);
339
sprintf(tmp, "%s.log", blast_type == NULL? "wwwblast" : blast_type);
341
log_fd = FileOpen(tmp, "a");
343
if(log_fd == NULL) /* If log_fd == NULL - no problem */
346
BLAST_Time(tmp, sizeof(tmp), 0);
348
if((hostname = getenv("PROXIED_IP")) == NULL)
349
hostname = WWWGetAddress(theInfo->info);
351
fprintf(log_fd, "\n%d|%s|%s|%s",
352
getpid(), tmp, hostname == NULL? "host_not_set" : hostname,
353
WWWGetAgent(theInfo->info));
359
static Int4 GetLetterIndex(CharPtr letters, Char ch)
363
for(index = 0; letters[index] != NULLB; index++) {
364
if (letters[index] == ch) {
370
static Boolean ParseInputString(CharPtr string,
372
CharPtr PNTR *values_in,
373
CharPtr PNTR ErrorMessage)
376
Int4 i, index = 0, max_par_num;
381
if(string == NULL || letters == NULL ||
382
*letters == '\0' || values_in == NULL) {
386
max_par_num = strlen(letters);
388
values = (CharPtr PNTR)MemNew(max_par_num * sizeof(CharPtr));
394
while(IS_WHITESP(*chptr)) /* Rolling spaces */
397
if(*chptr == NULLB) /* Check for NULLB */
400
if (*chptr != '-') { /* Check for the option sign */
401
sprintf(message, "Invalid input string started from \"%s\"",
403
*ErrorMessage = StringSave(message);
409
/* checking index in options table */
411
if((index = GetLetterIndex(letters, *chptr)) < 0) {
412
sprintf(message, "Character \'%c\' is not a valid option",
414
*ErrorMessage = StringSave(message);
418
if(*chptr == NULLB) /* Check for NULLB */
421
while(!IS_WHITESP(*chptr)) { /* Finding first space */
422
if(*chptr == NULLB) /* Check for NULLB */
427
while(IS_WHITESP(*chptr)) /* Rolling spaces */
430
if(*chptr == NULLB) /* Check for NULLB */
433
for(i=0; !IS_WHITESP(*chptr) && *chptr != NULLB; i++, chptr++) {
439
MemFree(values[index]);
440
values[index] = StringSave(option);
445
/* Set of functions to handle BLAST custom configuration file */
446
static BLASTConfigPtr BLASTConfigNew(void)
448
BLASTConfigPtr config;
450
if((config = (BLASTConfigPtr) MemNew(sizeof(BLASTConfig))) == NULL)
453
config->run_max = DEFAULT_RUN_MAX;
454
config->queue_max = DEFAULT_QUEUE_MAX;
455
config->num_cpu = NUM_CPU_TO_USE;
456
MemSet(config->allow_db, 0, sizeof(CharPtr)*MAX_DB_NUM);
460
static Int4 BLASTEatWs (FILE* fp)
464
while ((ch = fgetc (fp)) != EOF) {
465
if (ch != ' ' && ch != '\t')
471
static void BLASTConfigGetWord(CharPtr word, CharPtr line)
475
for(x=0; line[x] && IS_WHITESP(line[x]); x++);
478
if(!(word[y] = line[x]))
480
if(IS_WHITESP(line[x]))
481
if((!x) || (line[x-1] != '\\'))
483
if(line[x] != '\\') ++y;
488
while(line[x] && IS_WHITESP(line[x])) ++x;
490
for(y=0;(line[y] = line[x]);++x,++y);
492
static Int4 BLASTConfigGetLine (CharPtr s, Int4 n, FILE* fp)
499
if (ch == EOF || ch == '\n' || (len == n-1)) {
500
if (len && s[len - 1] == ' ') s[len - 1] = '\0';
502
return feof(fp) ? 1 : 0;
507
if (ch == '\t' || ch == ' ') {
514
#define MAX_LINE_SIZE 2048
515
static BLASTConfigPtr BLASTReadConfigFile(CharPtr filename, CharPtr program)
518
BLASTConfigPtr config;
519
Char line[MAX_LINE_SIZE], word[MAX_LINE_SIZE];
525
if((config = BLASTConfigNew()) == NULL)
528
if((fd = FileOpen(filename, "r")) == NULL)
531
while(!(BLASTConfigGetLine(line, MAX_LINE_SIZE, fd))) {
532
if((line[0] != '#') && (line[0] != '\0')) {
533
BLASTConfigGetWord(word, line);
535
if(!StringICmp(word, "RunMaxProcesses") &&
536
(value = atoi(line)) != 0) {
537
config->run_max = value;
538
} else if(!StringICmp(word, "QueueMaxJobs") &&
539
(value = atoi(line)) != 0) {
540
config->queue_max = value;
541
} else if(!StringICmp(word, "NumCpuToUse") &&
542
(value = atoi(line)) != 0) {
543
config->num_cpu = value;
544
} else if(!StringICmp(word, "NiceValue") &&
545
(value = atoi(line)) != 0) {
546
config->niceval = value;
547
} else if(!StringICmp(word, program)) {
548
for(i = 0 ; line[0] != NULLB && i < MAX_DB_NUM; i++) {
549
BLASTConfigGetWord(word, line);
550
config->allow_db[i] = StringSave(word);
560
static Boolean ValidateCombinationsEx(WWWBlastInfoPtr theInfo,
565
for(i = 0; theInfo->blast_config->allow_db[i] != NULL; i++) {
566
if(!StringICmp(database, theInfo->blast_config->allow_db[i]))
572
/* This will work if search require to choose few databases */
573
static Boolean WWWParseDatabases(WWWBlastInfoPtr theInfo)
576
Boolean done, datalib_found;
577
Char buffer[4096], buffer1[4096]; /* is 4096 always long enough? XXX */
580
count = WWWGetNumEntries(theInfo->info);
581
datalib_found = FALSE;
584
for (index=0; index<count; index++) {
585
chptr = WWWGetNameByIndex(theInfo->info, index);
586
if (StringCmp(chptr, "DATALIB") == 0) {
587
datalib_found = TRUE;
588
chptr = WWWGetValueByIndex(theInfo->info, index);
591
/* Parse string if multiple database names. */
592
while (done == FALSE) {
593
done = readdb_parse_db_names(&chptr, buffer1);
594
if (ValidateCombinationsEx(theInfo, buffer1) == TRUE) {
596
CharPtr prefix = WWWGetValueByName(theInfo->info, "DB_DIR_PREFIX");
600
sprintf(tmpbuf, "%s%c%s", prefix, DIRDELIMCHR, buffer1);
602
sprintf(tmpbuf, "%s", buffer1);
605
StringCpy(ptr, tmpbuf);
606
ptr += StringLen(tmpbuf);
609
WWWBlastErrMessage(BLASTErrCombination, NULL);
619
theInfo->database = StringSave(buffer);
621
WWWBlastErrMessage(BLASTErrDatalib, NULL);
625
/* Processing database aliases */
627
if(StringStr(theInfo->database, "E.coli") != NULL) {
628
MemFree(theInfo->database);
629
theInfo->database = StringSave("ecoli");
634
#if defined(NCBI_CLIENT_SERVER) || defined (NCBI_ENTREZ_CLIENT)
636
static Int4 AccessionToGi (CharPtr string)
648
for(chptr = string, digit = TRUE; *chptr != NULLB; chptr++) {
649
if(!IS_DIGIT(*chptr)) {
656
if((gi = atol(string)) > 0)
660
/* all letters in accesion should be upper */
661
string = Nlm_StringUpper(string);
665
if((sip = ValNodeNew (NULL)) == NULL)
668
index = 0; version = 0;
669
while (*string != '\0' && index < 16) {
672
buffer[index] = *string;
677
buffer[index] = '\0';
678
if (*string == '.' && *(string+1) != '\0') {
679
sscanf((string+1), "%ld", &tmplong);
680
version = (Int2) tmplong;
683
if((tsip = TextSeqIdNew ()) == NULL)
686
tsip->accession = StringSave(buffer);
687
tsip->version = version;
689
/* GenBank, EMBL, and DDBJ. */
690
sip->choice = SEQID_GENBANK;
691
sip->data.ptrvalue = (Pointer) tsip;
692
gi = ID1FindSeqId (sip);
696
sip->choice = SEQID_SWISSPROT;
697
gi = ID1FindSeqId (sip);
704
sip->choice = SEQID_PIR;
705
gi = ID1FindSeqId (sip);
712
sip->choice = SEQID_PRF;
713
gi = ID1FindSeqId (sip);
719
/* OTHER, probably 'ref' */
720
sip->choice = SEQID_OTHER;
721
gi = ID1FindSeqId (sip);
727
/* OK. We failed to find gi using string as TextSeqId. Now trying
728
last time - with PDBSeqIdPtr */
730
if((psip = PDBSeqIdNew()) == NULL)
733
sip->choice = SEQID_PDB;
734
sip->data.ptrvalue = psip;
736
psip->mol = StringSave(buffer);
737
psip->chain = version;
739
gi = ID1FindSeqId (sip);
749
#define MAX_NUM_QUERIES 16383 /* == 1/2 INT2_MAX */
750
Boolean WWWCreateSearchOptions(WWWBlastInfoPtr theInfo)
752
CharPtr chptr, ptr, sequence, outptr;
755
CharPtr opt_str = "GErqeWvbKLY";
756
BLAST_OptionsBlkPtr options;
759
Boolean is_megablast = FALSE, mask_lower_case = FALSE;
760
SeqLocPtr query_slp = NULL, query_lcase_mask = NULL;
764
if((chptr = WWWGetValueByName(theInfo->info, "PROGRAM")) != NULL) {
765
theInfo->program = StringSave(chptr);
767
theInfo->program = StringSave("blastn");
768
/*WWWBlastErrMessage(BLASTErrProgram, NULL);
772
/* Is it MEGABLAST? */
774
if (WWWGetValueByName(theInfo->info, "MEGABLAST") != NULL)
777
/* Configuration file set program/database relations */
779
if((theInfo->blast_config =
780
BLASTReadConfigFile(theInfo->ConfigFile, theInfo->program)) == NULL) {
781
WWWBlastErrMessage(BLASTConfigFile, NULL);
786
if(!WWWParseDatabases(theInfo))
789
/* SEQUENCE or SEQFILE */
791
if((sequence = WWWGetValueByName(theInfo->info, "SEQUENCE")) == NULL ||
792
sequence[0] == NULLB) {
793
if((sequence = WWWGetValueByName(theInfo->info, "SEQFILE")) == NULL ||
794
sequence[0] == NULLB) {
795
WWWBlastErrMessage(BLASTErrNoSequence, NULL);
800
theInfo->align_type = BlastGetTypes(theInfo->program, &theInfo->query_is_na, &theInfo->db_is_na);
801
#if defined(NCBI_CLIENT_SERVER) || defined (NCBI_ENTREZ_CLIENT)
803
if((chptr = WWWGetValueByName(theInfo->info, "INPUT_TYPE")) != NULL &&
804
!StringNICmp(chptr, "Accession", 9)) {
806
Int4 i, gi, number, title_length, id_length, query_num = 0;
807
CharPtr accession, new_defline;
808
BioseqPtr bsp_tmp, query_bsp;
812
Int2 retval, buf_length=512;
815
Boolean first_query = TRUE;
818
/* This is request by Accession/GI - asking ID */
820
if (!ID1BioseqFetchEnable("web-blasts", TRUE)) {
821
WWWBlastErrMessage(BLASTEntrez, NULL);
826
theInfo->query_slp = NULL;
827
StrCpy(delimiters, " \t\n,;\r");
829
while (outptr = StringTokMT(chptr, delimiters, &chptr)) {
833
/* Strip off non-alphanumerics, except for '_' (used in SP) and '.' (soon to be used by the collaboration. */
834
while (IS_ALPHANUM(*outptr) || *outptr == '_' || *outptr == '.')
839
gi = AccessionToGi(accession);
842
ValNodeAddInt(&sip, SEQID_GI, gi);
844
WWWBlastErrMessageEx(BLASTEntrez, NULL, accession);
845
if (!theInfo->query_slp && !chptr)
850
/* If id is not found - it is not found - ID1 is down? */
852
if((bsp_tmp = BioseqLockById(sip)) == NULL) {
853
WWWBlastErrMessageEx(BLASTEntrez, NULL, accession);
854
if (!theInfo->query_slp && !chptr)
859
if (ISA_na(bsp_tmp->mol) != theInfo->query_is_na) {
860
WWWBlastErrMessageEx(BLASTErrAccType, NULL, accession);
861
if (!theInfo->query_slp && !chptr)
866
query_bsp = BioseqNew();
867
query_bsp->length = bsp_tmp->length;
868
query_bsp->mol = bsp_tmp->mol;
869
query_bsp->repr = Seq_repr_raw;
870
query_bsp->seq_data = BSNew(query_bsp->length);
872
if (ISA_na(query_bsp->mol)) {
873
spp = SeqPortNew(bsp_tmp, 0, -1, Seq_strand_plus,
875
query_bsp->seq_data_type = Seq_code_iupacna;
877
spp = SeqPortNew(bsp_tmp, 0, -1, Seq_strand_unknown,
879
query_bsp->seq_data_type = Seq_code_ncbieaa;
882
SeqPortSet_do_virtual(spp, TRUE);
884
while (number < query_bsp->length) {
885
retval = SeqPortRead(spp, buf, buf_length);
888
BSWrite(query_bsp->seq_data, buf, retval);
894
title_length = StringLen(BioseqGetTitle(bsp_tmp));
895
SeqIdWrite(bsp_tmp->id, tmp, PRINTID_FASTA_LONG, 255);
896
id_length = StringLen(tmp);
897
title_length += id_length;
899
new_defline = (CharPtr) MemNew(title_length*sizeof(Char));
900
StringCpy(new_defline, tmp);
901
*(new_defline+id_length) = ' ';
902
StringCpy(new_defline+id_length+1, BioseqGetTitle(bsp_tmp));
903
*(new_defline+title_length-1) = NULLB;
904
query_bsp->descr = ValNodeAddStr(NULL, Seq_descr_title,
906
query_bsp->id = ValNodeNew(NULL);
909
oid->str = (CharPtr) Malloc(6);
910
sprintf(oid->str, "%d", query_num);
911
query_bsp->id->choice = SEQID_LOCAL;
912
query_bsp->id->data.ptrvalue = (Pointer) oid;
914
SeqMgrDeleteFromBioseqIndex(bsp_tmp);
916
BioseqUnlock(bsp_tmp);
918
BioseqPack(query_bsp);
920
theInfo->query_bsp = query_bsp;
924
ValNodeAddPointer(&theInfo->query_slp, SEQLOC_WHOLE,
925
SeqIdSetDup(SeqIdFindBest(query_bsp->id,
929
ID1BioseqFetchDisable();
934
if (WWWGetValueByName(theInfo->info, "LCASE_MASK") != NULL)
935
mask_lower_case = TRUE;
938
/* Creating Bioseq */
940
if (theInfo->query_bsp == NULL) {
942
BioseqPtr query_bsp = NULL;
943
SeqLocPtr last_mask, mask_slp;
949
SeqMgrHoldIndexing(TRUE);
950
mask_slp = last_mask = NULL;
952
while ((sep=FastaToSeqBuffForDb(sequence, &outptr,
953
theInfo->query_is_na, NULL,
954
theInfo->believe_query,
955
prefix, &ctr, &mask_slp)) != NULL) {
958
if (!mask_lower_case)
959
SeqLocFree(mask_slp);
961
query_lcase_mask = last_mask = mask_slp;
963
last_mask->next = mask_slp;
964
last_mask = last_mask->next;
969
if (theInfo->query_is_na)
970
SeqEntryExplore(sep, &query_bsp, FindNuc);
972
SeqEntryExplore(sep, &query_bsp, FindProt);
974
if (query_bsp == NULL) {
975
ErrPostEx(SEV_FATAL, 0, 0, "Unable to obtain bioseq\n");
979
if (num_queries > MAX_NUM_QUERIES) {
980
WWWBlastErrMessage(BLASTOptionStr,
981
"Maximal number of queries exceeded. At most 16383 are allowed");
984
if (!theInfo->query_slp) /* I.e. if first query */
985
theInfo->query_bsp = query_bsp;
987
ValNodeAddPointer(&theInfo->query_slp, SEQLOC_WHOLE,
988
SeqIdSetDup(SeqIdFindBest(query_bsp->id,
991
SeqMgrHoldIndexing(FALSE);
992
} else if (is_megablast)
993
ValNodeAddPointer(&theInfo->query_slp, SEQLOC_WHOLE,
994
SeqIdSetDup(SeqIdFindBest(theInfo->query_bsp->id,
997
/* The last check of Bioseq - if length of sequence too small ? */
999
if(theInfo->query_bsp == NULL ||
1000
theInfo->query_bsp->length <= 0) {
1001
WWWBlastErrMessage(BLASTFastaToSE, NULL);
1005
/* This will prevent from incorrect formating in case when input
1006
sequence has valid SeqId, but in fact this SeqId do not correspond
1007
to the real sequence - XXX */
1009
if(!theInfo->believe_query)
1010
theInfo->fake_bsp = BlastMakeFakeBioseq(theInfo->query_bsp, NULL);
1012
theInfo->fake_bsp = theInfo->query_bsp;
1016
if (WWWGetValueByName(theInfo->info, "OVERVIEW") != NULL)
1017
theInfo->show_overview = TRUE;
1019
/* UNGAPPED_ALIGNMENT */
1021
if(WWWGetValueByName(theInfo->info, "UNGAPPED_ALIGNMENT") != NULL)
1024
if((options = BLASTOptionNew(theInfo->program, gapped_set)) == NULL) {
1025
WWWBlastErrMessage(BLASTErrOptions, NULL);
1030
options->is_megablast_search = TRUE;
1031
options->query_lcase_mask = query_lcase_mask;
1033
options->wordsize = 28;
1034
if((chptr = WWWGetValueByName(theInfo->info, "WORD_SIZE")) != NULL &&
1035
StringStr(chptr, "default") == NULL) {
1036
options->wordsize = atoi(chptr);
1038
options->gap_open = 0;
1039
options->gap_extend = 0;
1040
options->block_width = 0;
1041
/* Mega BLAST with no traceback (endpoints only)? */
1042
if (WWWGetValueByName(theInfo->info, "ENDPOINTS") != NULL)
1043
options->no_traceback = TRUE;
1044
if ((chptr = WWWGetValueByName(theInfo->info, "PERC_IDENT")) != NULL &&
1045
StringStr(chptr, "default") == NULL)
1046
options->perc_identity = (FloatLo) atof(chptr);
1049
theInfo->options = options;
1051
/* Set default values for matrix and gap parameters */
1052
BLASTOptionSetGapParams (options, NULL, 0, 0);
1054
/* Read MAT_PARAM if set */
1056
if(StringICmp("blastn", theInfo->program) &&
1057
(chptr = WWWGetValueByName(theInfo->info, "MAT_PARAM")) != NULL) {
1058
Char matrixname[64];
1059
Int4 opencost, extendedcost;
1060
/* Get matrix name and gap costs */
1061
if (chptr[1] != '-' || chptr[2] != '-') {
1062
sscanf(chptr, "%s\t %d\t %d", matrixname, &opencost,
1064
/* set the parameters */
1065
options->gap_open = opencost;
1066
options->gap_extend = extendedcost;
1067
if (options->matrix)
1068
MemFree(options->matrix);
1069
options->matrix = StringSave(matrixname);
1073
if((chptr = WWWGetValueByName(theInfo->info, "GAP_OPEN")) != NULL &&
1074
StringStr(chptr, "default") == NULL) {
1075
options->gap_open = atoi(chptr);
1078
if((chptr = WWWGetValueByName(theInfo->info, "GAP_EXTEND")) != NULL &&
1079
StringStr(chptr, "default") == NULL) {
1080
options->gap_extend = atoi(chptr);
1083
if((chptr = WWWGetValueByName(theInfo->info, "GAP_VALUES")) != NULL &&
1084
StringStr(chptr, "default") == NULL) {
1085
sscanf(chptr, "%d,%d", &options->gap_open, &options->gap_extend);
1088
if((chptr = WWWGetValueByName(theInfo->info, "X_DROPOFF")) != NULL &&
1089
StringStr(chptr, "default") == NULL) {
1090
options->gap_x_dropoff = atoi(chptr);
1093
if (!StringICmp(theInfo->program, "blastn")) {
1094
if (options->perc_identity > 98.0 || options->perc_identity <= 0) {
1095
options->reward = 1;
1096
options->penalty = -3;
1097
} else if (options->perc_identity > 95.0) {
1098
options->reward = 2;
1099
options->penalty = -5;
1100
} else if (options->perc_identity > 90.0) {
1101
options->reward = 1;
1102
options->penalty = -2;
1103
} else if (options->perc_identity > 85.0) {
1104
options->reward = 2;
1105
options->penalty = -3;
1106
} else if (options->perc_identity > 80.0) {
1107
options->reward = 3;
1108
options->penalty = -4;
1109
} else if (options->perc_identity > 75.0) {
1110
options->reward = 5;
1111
options->penalty = -6;
1113
options->reward = 1;
1114
options->penalty = -1;
1118
if((chptr = WWWGetValueByName(theInfo->info, "GAP_SIZE")) != NULL &&
1119
StringStr(chptr, "default") == NULL) {
1120
options->gap_size = atoi(chptr);
1123
if((chptr = WWWGetValueByName(theInfo->info,
1124
"WINDOW_SIZE")) != NULL &&
1125
StringStr(chptr, "default") == NULL) {
1126
options->window_size = atoi(chptr);
1129
/* For BLASTX we could set genetic code */
1131
if (!StringICmp(theInfo->program, "blastx")) {
1132
BioSourcePtr source;
1134
options->genetic_code = 1;
1136
if((chptr = WWWGetValueByName(theInfo->info,
1137
"GENETIC_CODE")) != NULL &&
1138
(StringStr(chptr, "default") == NULL)) {
1139
chptr = StringChr(chptr, '(');
1140
sscanf(chptr, "(%d", &value);
1142
options->genetic_code = value;
1144
source = BioSourceNew();
1145
source->org = OrgRefNew();
1146
source->org->orgname = OrgNameNew();
1147
source->org->orgname->gcode = options->genetic_code;
1148
ValNodeAddPointer(&theInfo->query_bsp->descr,
1149
Seq_descr_source, source);
1154
/* For BLASTX we could set genetic code */
1156
if (!StringICmp(theInfo->program, "blastx")) {
1157
if((chptr = WWWGetValueByName(theInfo->info, "OOF_ALIGN")) != NULL) {
1158
options->is_ooframe = TRUE;
1159
options->shift_pen = atoi(chptr);
1161
if(options->shift_pen == 0) {
1162
options->is_ooframe = FALSE;
1167
/* For TBLASTN and TBLASTX we could set DB_GENETIC_CODE */
1169
if (!StringICmp(theInfo->program, "tblastn") ||
1170
!(StringICmp(theInfo->program, "tblastx"))) {
1171
options->db_genetic_code = 1;
1172
if((chptr = WWWGetValueByName(theInfo->info,
1173
"DB_GENETIC_CODE")) != NULL &&
1174
(StringStr(chptr, "default") == NULL)) {
1175
chptr = StringChr(chptr, '(');
1176
sscanf(chptr, "(%d", &value);
1178
options->genetic_code = value;
1183
if((chptr = WWWGetValueByName(theInfo->info,
1184
"THRESHOLD_2")) != NULL &&
1185
(StringStr(chptr, "default") == NULL)) {
1186
options->threshold_second = atoi(chptr);
1189
if((chptr = WWWGetValueByName(theInfo->info,
1190
"REQUIRED_START")) != NULL &&
1191
StringStr(chptr, "default") != NULL) {
1192
options->required_start = atoi(chptr);
1195
if((chptr = WWWGetValueByName(theInfo->info,
1196
"REQUIRED_END")) != NULL &&
1197
StringStr(chptr, "default") != NULL) {
1198
options->required_end = atoi(chptr);
1201
if((chptr = WWWGetValueByName(theInfo->info,
1202
"DROPOFF_1")) != NULL &&
1203
(StringStr(chptr, "default") == NULL)) {
1204
options->dropoff_1st_pass = atof(chptr);
1207
if((chptr = WWWGetValueByName(theInfo->info,
1208
"CUTOFF")) != NULL &&
1209
(StringStr(chptr, "default") == NULL)) {
1210
options->cutoff_s = atof(chptr);
1214
if((chptr = WWWGetValueByName(theInfo->info,
1215
"DROPOFF_2")) != NULL &&
1216
(StringStr(chptr, "default") == NULL)) {
1217
options->dropoff_2nd_pass = atof(chptr);
1222
if((chptr = WWWGetValueByName(theInfo->info,
1223
"MATRIX")) != NULL &&
1224
(StringStr(chptr, "default") == NULL)) {
1225
options->matrix = StringSave(chptr);
1230
options->expect_value = DEFAULT_EXPECT;
1232
if((chptr = WWWGetValueByName(theInfo->info,
1233
"EXPECT")) != NULL &&
1234
StringStr(chptr, "default") == NULL) {
1235
options->expect_value = atof(chptr);
1238
/* NUMBER OF BITS: */
1240
if((chptr = WWWGetValueByName(theInfo->info,
1241
"NUM_OF_BITS")) != NULL &&
1242
(StringStr(chptr, "default") == NULL)) {
1243
options->number_of_bits = atof(chptr);
1246
/* Parameters for Smith-Waterman BLAST */
1248
/* TWEAK_PARAMETERS */
1250
if (WWWGetValueByName(theInfo->info, "TWEAK_PARAMETERS") != NULL)
1251
theInfo->options->tweak_parameters = TRUE;
1253
/* Adjustment of expect value and hitlist size */
1254
if (theInfo->options->tweak_parameters) {
1255
theInfo->options->hitlist_size *= 2; /*allows for extra matches*/
1256
theInfo->options->original_expect_value =
1257
theInfo->options->expect_value;
1258
if (theInfo->options->expect_value < 0.1) {
1259
theInfo->options->expect_value =
1260
MIN(0.1, 10 * theInfo->options->expect_value);
1264
/* SMITH_WATERMAN */
1266
if (WWWGetValueByName(theInfo->info, "SMITH_WATERMAN") != NULL)
1267
theInfo->options->smith_waterman = TRUE;
1271
if (WWWGetValueByName(theInfo->info, "RPSBLAST") != NULL)
1272
theInfo->options->is_rps_blast = TRUE;
1276
if ((chptr = WWWGetValueByName(theInfo->info, "ENTREZ_QUERY")) != NULL) {
1277
theInfo->options->entrez_query = StringSave(chptr);
1279
#if defined(NCBI_CLIENT_SERVER) || defined (NCBI_ENTREZ_CLIENT)
1283
theInfo->gi_list_total =
1284
BLASTGetUidsFromQuery(chptr, &uids, theInfo->db_is_na, FALSE);
1286
if(theInfo->gi_list_total > 0) {
1288
theInfo->gi_list = MemNew(theInfo->gi_list_total*sizeof(BlastDoubleInt4));
1289
for(i = 0; i < theInfo->gi_list_total; i++)
1290
theInfo->gi_list[i].gi = uids[i];
1295
/* ----------------------------------- */
1297
/* Number of CPU to use in BLAST Search: */
1299
if(theInfo->blast_config->num_cpu != 0)
1300
options->number_of_cpus = theInfo->blast_config->num_cpu;
1302
options->number_of_cpus = NUM_CPU_TO_USE;
1304
/* CPU time limit. */
1306
options->cpu_limit = DEFAULT_CPU_LIMIT;
1311
options->filter = FILTER_NONE;
1313
if(WWWGetMethod(theInfo->info) == WWW_GET ||
1314
(chptr = WWWGetValueByName(theInfo->info, "FSET")) != NULL) {
1315
if (!StringICmp(theInfo->program, "blastn")) {
1316
options->filter = FILTER_DUST;
1318
options->filter = FILTER_SEG;
1324
/* Filter settings */
1325
int i, num_entries = WWWGetNumEntries(theInfo->info);
1327
StringCpy(tmpbuf, "");
1329
for(i = 0; i < num_entries; i++) {
1330
if((chptr = WWWGetNameByIndex(theInfo->info, i)) != NULL &&
1331
!StringICmp(chptr, "FILTER")) {
1333
chptr = WWWGetValueByIndex(theInfo->info, i);
1334
/* add the filter */
1335
StringCat(tmpbuf, chptr);
1336
StringCat(tmpbuf, ";");
1339
options->filter_string = StringSave(tmpbuf);
1344
theInfo->number_of_descriptions = DEFAULT_DESCRIPTIONS;
1346
if((chptr = WWWGetValueByName(theInfo->info,
1347
"DESCRIPTIONS")) != NULL &&
1348
StringStr(chptr, "default") == NULL) {
1349
theInfo->number_of_descriptions = atoi(chptr);
1353
theInfo->number_of_alignments = DEFAULT_ALIGNMENTS;
1355
if((chptr = WWWGetValueByName(theInfo->info, "ALIGNMENTS")) != NULL &&
1356
StringStr(chptr, "default") == NULL) {
1357
theInfo->number_of_alignments = atoi(chptr);
1360
/* Now processing OTHER_ADVANCED_OPTIONS */
1362
if((chptr = WWWGetValueByName(theInfo->info,
1363
"OTHER_ADVANCED")) != NULL) {
1365
CharPtr ErrorMessage = NULL;
1366
CharPtr PNTR values = NULL;
1368
if(!ParseInputString(chptr, opt_str,
1369
&values, &ErrorMessage)) {
1371
WWWBlastErrMessage(BLASTOptionStr, ErrorMessage);
1375
/* -G gap open cost */
1377
index = GetLetterIndex(opt_str, 'G');
1378
if(values[index] != NULL) {
1379
options->gap_open = atoi(values[index]);
1382
/* -E gap extend cost */
1384
index = GetLetterIndex(opt_str, 'E');
1385
if(values[index] != NULL) {
1386
options->gap_extend = atoi(values[index]);
1389
/* -q penalty for nucleotide mismatch. */
1391
index = GetLetterIndex(opt_str, 'q');
1392
if(values[index] != NULL) {
1393
options->penalty = atoi(values[index]);
1396
/* -r reward for nucleotide match. */
1398
index = GetLetterIndex(opt_str, 'r');
1399
if(values[index] != NULL) {
1400
options->reward = atoi(values[index]);
1403
/* -e expect value. */
1405
index = GetLetterIndex(opt_str, 'e');
1406
if(values[index] != NULL) {
1407
options->expect_value = atof(values[index]);
1412
index = GetLetterIndex(opt_str, 'W');
1413
if(values[index] != NULL) {
1414
options->wordsize = atoi(values[index]);
1416
if (options->wordsize < 8)
1417
options->wordsize = 8;
1418
options->cutoff_s2 = options->wordsize*options->reward;
1419
options->wordsize += 4;
1423
/* -v Number of descriptions to print. */
1425
index = GetLetterIndex(opt_str, 'v');
1426
if(values[index] != NULL) {
1427
theInfo->number_of_descriptions = atoi(values[index]);
1430
/* -b Number of alignments to show. */
1432
index = GetLetterIndex(opt_str, 'b');
1433
if(values[index] != NULL) {
1434
theInfo->number_of_alignments = atoi(values[index]);
1437
/* -K Number of best hits from a region to keep. */
1439
index = GetLetterIndex(opt_str, 'K');
1440
if(values[index] != NULL) {
1441
options->hsp_range_max = atoi(values[index]);
1442
if (options->hsp_range_max != 0)
1443
options->perform_culling = TRUE;
1446
/* -L Number of best hits from a region to keep. */
1448
index = GetLetterIndex(opt_str, 'L');
1449
if(values[index] != NULL) {
1450
options->block_width = atoi(values[index]);
1453
/* -Y effective search space. */
1455
index = GetLetterIndex(opt_str, 'Y');
1456
if(values[index] != NULL) {
1457
options->searchsp_eff = atof(values[index]);
1463
options->hsp_range_max = 100*options->hitlist_size;
1464
options->block_width = theInfo->bsp->length;
1467
options->perform_culling = FALSE;
1470
/* Some values for PSI-Blast */
1472
if((chptr = WWWGetValueByName(theInfo->info, "STEP_NUMBER")) != NULL)
1473
value = atoi(chptr);
1475
sprintf(tmp, "PSI_BEGIN%d", value-1);
1477
if((chptr = WWWGetValueByName(theInfo->info, tmp)) != NULL)
1478
options->required_start = atoi(chptr) - 1;
1480
sprintf(tmp, "PSI_END%d", value-1);
1481
if((chptr = WWWGetValueByName(theInfo->info, tmp)) != NULL)
1482
options->required_end = atoi(chptr) - 1;
1484
if((chptr = WWWGetValueByName(theInfo->info, "E_THRESH")) != NULL)
1485
options->ethresh = atof(chptr);
1487
if((chptr = WWWGetValueByName(theInfo->info, "PHI_BLAST")) != NULL) {
1488
theInfo->is_phi_blast = TRUE;
1489
options->number_of_cpus = 1;
1492
/* ------------------------ */
1496
theInfo->options->hitlist_size = MAX(theInfo->number_of_descriptions,
1497
theInfo->number_of_alignments);
1499
/* ALIGNMENT VIEWS */
1501
if((chptr = WWWGetValueByName(theInfo->info,
1502
"ALIGNMENT_VIEW")) != NULL &&
1503
StringStr(chptr, "default") == NULL) {
1504
theInfo->align_view = atoi(chptr);
1506
if(theInfo->align_view == 12) {
1507
theInfo->xml_output = TRUE;
1511
if (WWWGetValueByName(theInfo->info, "XML_OUTPUT") != NULL)
1512
theInfo->xml_output = TRUE;
1514
if(options->is_ooframe) {
1515
theInfo->xml_output = FALSE;
1516
theInfo->align_view = 0;
1519
if (WWWGetValueByName(theInfo->info, "NCBI_GI") != NULL)
1520
theInfo->show_gi = TRUE;
1522
if (WWWGetValueByName(theInfo->info, "OVERVIEW") != NULL)
1523
theInfo->show_overview = TRUE;
1525
if (WWWGetValueByName(theInfo->info, "TAX_BLAST") != NULL)
1526
theInfo->show_tax_blast = TRUE;
1529
if((chptr = WWWGetValueByName(theInfo->info, "COLOR_SCHEMA")) != NULL &&
1530
StringStr(chptr, "No color schema") == NULL) {
1531
theInfo->color_schema = atoi(chptr);
1533
/* Now seting print and align options for BLAST output printing */
1535
theInfo->print_options = 0;
1536
theInfo->align_options = 0;
1537
theInfo->align_options += TXALIGN_COMPRESS;
1538
theInfo->align_options += TXALIGN_END_NUM;
1540
if (StringICmp("blastx", theInfo->program) == 0) {
1541
theInfo->align_options += TXALIGN_BLASTX_SPECIAL;
1544
if (theInfo->show_gi) {
1545
theInfo->align_options += TXALIGN_SHOW_GI;
1546
theInfo->print_options += TXALIGN_SHOW_GI;
1550
theInfo->print_options += TXALIGN_SHOW_NO_OF_SEGS;
1552
if (theInfo->align_view) {
1553
theInfo->align_options += TXALIGN_MASTER;
1554
if (theInfo->align_view == 1 || theInfo->align_view == 3)
1555
theInfo->align_options += TXALIGN_MISMATCH;
1556
if (theInfo->align_view == 3 || theInfo->align_view == 4 ||
1557
theInfo->align_view == 6)
1558
theInfo->align_options += TXALIGN_FLAT_INS;
1559
if (theInfo->align_view == 5 || theInfo->align_view == 6)
1560
theInfo->align_options += TXALIGN_BLUNT_END;
1562
theInfo->align_options += TXALIGN_MATRIX_VAL;
1563
theInfo->align_options += TXALIGN_SHOW_QS;
1566
/* Always HTML in WWW case */
1568
theInfo->align_options += TXALIGN_HTML;
1569
theInfo->print_options += TXALIGN_HTML;
1574
Boolean WWWValidateOptions(WWWBlastInfoPtr theInfo)
1576
ValNodePtr error_return=NULL;
1577
BlastErrorMsgPtr error_msg;
1580
if(theInfo == NULL || theInfo->options == NULL)
1583
if(BLASTOptionValidateEx(theInfo->options, theInfo->program,
1584
&error_return) != 0) {
1586
error_msg = (BlastErrorMsgPtr) error_return->data.ptrvalue;
1587
msg = error_msg->msg;
1589
WWWBlastErrMessage(BLASTErrOptions, msg);
1596
/* Used for PSI/PHI Blast searches */
1598
static Int1 S62ToInt(Uint1 ch)
1600
if(isupper(ch)) /* negative value */
1602
else if (isdigit(ch)) /* positive less than 10 */
1604
else if (!isupper(ch)) /* positive more or eq 10 */
1609
static Uint1 IntTo62S(Int1 value)
1613
else if (value < 10)
1615
else if (value < 36)
1620
static Int4 BLASTCharTo4bits(Char ch)
1624
if ((ch >= 'A') && (ch <= 'F'))
1625
return (((ch - 'A') + 10));
1626
else if ((ch >= '0') && (ch <= '9'))
1627
return ((ch - '0'));
1632
static Char BLAST4bitsToChar(int value)
1635
return (Char)(value + '0');
1637
return (Char)(value - 10 + 'A');
1640
static Nlm_FloatHi **BLASTDecodePosFreqs(CharPtr CHARPosFreqs,
1641
Int4 length, Int4 size)
1643
Nlm_FloatHi **posFreqs;
1644
register Int4 i, j, k = 0, l;
1648
if(CHARPosFreqs == NULL || CHARPosFreqs[0] == NULLB)
1651
posFreqs = (Nlm_FloatHi **) MemNew(sizeof(Nlm_FloatHi *)*(length+1));
1653
for(i = 0; i <= length; i++)
1654
posFreqs[i] = (Nlm_FloatHi *) MemNew(sizeof(Nlm_FloatHi)*size);
1656
for(i = 0, k = 0; i <= length; i++) {
1657
for(j =0; j < size; j++) {
1658
for(l = 0; l < 8; l++, k++) {
1659
ivalue += (BLASTCharTo4bits(CHARPosFreqs[k]) << (l * 4));
1660
/* ivalue = ivalue << 4; */
1663
MemCpy(&fvalue, &ivalue, 4);
1664
posFreqs[i][j] = (Nlm_FloatHi) fvalue; /* 4 bytes into 8 bytes */
1671
static Int4Ptr PNTR Decode62Matrix(CharPtr Matrix62, Int4 length, Int4 size)
1673
Int4Ptr PNTR posMatrix;
1674
register Int4 i, j, k = 0;
1676
if(Matrix62 == NULL || Matrix62[0] == NULLB)
1679
posMatrix = (Int4Ptr PNTR) MemNew(sizeof(Int4Ptr)*(length+1));
1681
for(i = 0; i <= length; i++)
1682
posMatrix[i] = (Int4Ptr) MemNew(sizeof(Int4Ptr)*size);
1684
for(i = 0; i <= length; i++) {
1685
for(j =0; j < size; j++) {
1686
if(Matrix62[k] == 'z')
1687
posMatrix[i][j] = BLAST_SCORE_MIN;
1688
else if (Matrix62[k] == 'Z')
1689
posMatrix[i][j] = BLAST_SCORE_MAX;
1691
posMatrix[i][j]= S62ToInt(Matrix62[k]);
1697
static CharPtr BLASTEncodePosFreqs(Nlm_FloatHi **posFreqs,
1698
Int4 length, Int4 size)
1700
Int4 i, j, k=0, ivalue, fmask, l;
1701
CharPtr CHARPosFreqs;
1705
/* So... size of the buffer will be eq. to number of elements
1706
in the posFreqs matrix times 8: 2 characters for every byte */
1707
CHARPosFreqs = (CharPtr) MemNew((length+1)*(size+1)*sizeof(Nlm_FloatLo)*2);
1709
for(i = 0, k = 0; i <= length; i++) {
1710
for(j =0; j < size; j++) {
1711
fvalue = (Nlm_FloatLo) posFreqs[i][j]; /* truncation to 4 bytes */
1712
fmask = 0xF; /* 4 bits */
1716
MemCpy(&ivalue, &fvalue, 4);
1717
for(l = 0; l < 8; l++, k++) {
1718
CHARPosFreqs[k] = BLAST4bitsToChar((ivalue & fmask) >> l * 4);
1724
return CHARPosFreqs;
1726
static CharPtr Encode62Matrix(Int4Ptr PNTR posMatrix, Int4 length, Int4 size)
1728
register Int4 i, j, k=0;
1731
Matrix62 = (CharPtr) MemNew((length+1)*size+1);
1733
for(i = 0; i <= length; i++) {
1734
for(j =0; j < size; j++) {
1736
if(posMatrix[i][j] < -26)
1738
else if (posMatrix[i][j] > 35)
1741
Matrix62[k] = IntTo62S(posMatrix[i][j]);
1749
void BLASTPrintDataFree(BLASTPrintDataPtr data)
1751
GIListPtr glp, glp_next;
1757
TxDfDbInfoDestruct(data->dbinfo);
1758
MemFree(data->ka_params);
1759
MemFree(data->ka_params_gap);
1760
MemFree(data->buffer);
1761
ValNodeFreeData(data->info_vnp);
1763
if(data->psidata != NULL) {
1764
MemFree(data->psidata->matrix62);
1765
MemFree(data->psidata->CHARPosFreqs);
1767
for(glp = data->psidata->PrevCheckedGIs; glp != NULL; glp = glp_next) {
1768
glp_next = glp->next;
1772
for(glp = data->psidata->PrevGoodGIs; glp != NULL; glp = glp_next) {
1773
glp_next = glp->next;
1776
MemFree(data->psidata);
1779
if(data->seqalign != NULL)
1780
SeqAlignSetFree(data->seqalign);
1782
SeqLocFree(data->seqloc);
1784
for(vnp = data->vnp; vnp != NULL; vnp=vnp->next) {
1785
SeqAlignSetFree((SeqAlignPtr) vnp->data.ptrvalue);
1788
ValNodeFree(data->vnp);
1794
/* We got seqalign list, now divide it into two lists:
1795
the first one will contain alignments with Evalue
1796
better than threshold, other worse than threshold
1800
SplitSeqAlign(SeqAlignPtr seqalign, SeqAlignPtr *GoodSeqAlignment_ptr,
1801
SeqAlignPtr *BadSeqAlignment_ptr, SeqAlignPtr *lastGood_ptr,
1802
Int2Ptr *marks_ptr, Int2Ptr countBad_ptr,
1803
Int2Ptr countGood_ptr, Nlm_FloatHi ethresh_old)
1807
SeqIdPtr last_id, subject_id;
1808
SeqAlignPtr gsl, seqalign_var, last_seqalign;
1809
SeqAlignPtr BadSeqAlignments, GoodSeqAlignments, lastGood = NULL;
1810
Nlm_FloatHi bit_score, evalue;
1812
Int2 countGood, countBad;
1817
GoodSeqAlignments = seqalign_var = seqalign;
1819
BadSeqAlignments = NULL;
1820
while (seqalign_var) {
1821
subject_id = SeqIdDup(TxGetSubjectIdFromSeqAlign(seqalign_var));
1822
if (last_id == NULL || SeqIdComp(subject_id, last_id) == SIC_NO) {
1823
SeqIdSetFree(last_id);
1824
last_id = subject_id;
1825
GetScoreAndEvalue(seqalign_var, &score, &bit_score, &evalue, &number);
1826
if (evalue > ethresh_old) {
1827
if (first_time == TRUE) {
1828
GoodSeqAlignments = NULL;
1830
last_seqalign = NULL; /* split the good and bad lists. */
1832
lastGood = last_seqalign;
1833
last_seqalign->next = NULL; /* split the good and bad lists. */
1835
BadSeqAlignments = seqalign_var;
1839
SeqIdSetFree(subject_id);
1843
last_seqalign = seqalign_var;
1844
seqalign_var = seqalign_var->next;
1847
/* count number of good and bad alignments */
1848
for (gsl = GoodSeqAlignments, countGood = 0; gsl; gsl = gsl->next,
1850
for (gsl = BadSeqAlignments, countBad = 0; gsl; gsl = gsl->next,
1853
if (countGood + countBad)
1854
/* allocate memo for marks array */
1855
marks = (Int2Ptr) MemNew(sizeof(Int2) * (countGood + countBad));
1859
*GoodSeqAlignment_ptr = GoodSeqAlignments;
1860
*BadSeqAlignment_ptr = BadSeqAlignments;
1861
*lastGood_ptr = lastGood;
1863
*countBad_ptr = countBad;
1864
*countGood_ptr = countGood;
1869
Boolean TestSTDOut(void)
1871
if(write(1, "", 0) < 0) {
1877
static int LIBCALLBACK WWWTickCallback(Int4 sequence_number,
1878
Int4 number_of_positive_hits)
1884
/* fprintf(stdout, "."); */
1886
printf("<!-- Progress msg from the server %d %d-->\n",
1887
sequence_number, number_of_positive_hits);
1894
static void PrintRequestHeader(WWWBlastInfoPtr theInfo)
1896
Char TimeNowStr[30], TimeModStr[30];
1901
printf("<TITLE>BLAST Search Results </TITLE>\n");
1902
printf("</HEAD>\n");
1903
printf("<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" "
1904
"VLINK=\"#660099\" ALINK=\"#660099\">\n");
1905
printf("<A HREF=\"%s/blast_form.map\">"
1906
"<IMG SRC=\"%s/images/psi_blast.gif\" "
1907
"BORDER=0 ISMAP></A>\n",
1908
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path,
1909
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
1910
printf("<BR><BR><PRE>\n");
1914
BlastPrintVersionInfo(theInfo->program, TRUE, stdout);
1915
fprintf(stdout, "\n");
1917
if (theInfo->is_phi_blast) {
1918
BlastPrintPhiReference(TRUE, 90, stdout);
1920
BlastPrintReference(TRUE, 90, stdout);
1923
fprintf(stdout, "\n");
1925
AcknowledgeBlastQuery(theInfo->query_bsp, 70,
1926
stdout, theInfo->believe_query, TRUE);
1929
dbinfo = GetDbInfoFromReadDb(search_data->database,
1930
!search_data->db_is_na);
1931
PrintDbInformation(dbinfo, 70, search_data->outfp,
1935
PrintDbInformation(theInfo->database, !theInfo->db_is_na, 70,
1939
fprintf(stdout, "Searching please wait...");
1946
static ValNodePtr seed_core_private (BlastSearchBlkPtr search, CharPtr program_name, BLAST_OptionsBlkPtr options, SeqLocPtr *seqloc_ptr, CharPtr patfile, CharPtr pattern, Uint1Ptr query, Uint1Ptr unfilter_query, Int4 queryLength, Boolean show_diagnostics, Nlm_FloatHi *paramC, ValNodePtr PNTR info_vnp)
1949
Boolean tmp_file_made = FALSE;
1950
Char buffer[PATH_MAX];
1952
Int4 hitlist_count, hspcnt, index, index1;
1953
Int4 number_of_descriptions, number_of_alignments;
1955
posSearchItems *posSearch;
1957
SeqIdPtr subject_id;
1958
SeqLocPtr next, seqloc;
1961
seedSearchItems *seedSearch;
1963
Boolean is_dna = FALSE; /*cannot use DNA queries in blastpgp*/
1964
Int4 i; /*index over characters*/
1971
program_flag = convertProgramToFlag(program_name, &is_dna);
1973
if (options->isPatternSearch) {
1975
/* open and fill a temporary file if there's a pattern. XXX */
1977
patfp = FileOpen(buffer, "w");
1978
fprintf(patfp, "ID \n");
1979
fprintf(patfp, "PA %s\n", pattern);
1983
tmp_file_made = TRUE;
1986
if (patfile) /* If a file was give, use it. */
1987
StringCpy(buffer, patfile);
1988
if ((patfp = FileOpen(buffer, "r")) == NULL) {
1989
ErrPostEx(SEV_FATAL, 0, 0, "blast: Unable to open pattern file %s\n", buffer);
1992
seedSearch = (seedSearchItems *) MemNew(sizeof(seedSearchItems));
1993
fillCandLambda(seedSearch, options->matrix, options);
1995
ErrPostEx(SEV_FATAL, 0, 0, "Must be a pattern search");
2000
*paramC = seedSearch->paramC;
2002
search->gap_align = GapAlignBlkNew(1,1);
2003
search->gap_align->gap_open = options->gap_open;
2004
search->gap_align->gap_extend = options->gap_extend;
2005
search->gap_align->decline_align = (-(BLAST_SCORE_MIN));
2006
search->gap_align->x_parameter = options->gap_x_dropoff
2007
*NCBIMATH_LN2/seedSearch->paramLambda;
2008
search->gap_align->matrix = search->sbp->matrix;
2009
initProbs(seedSearch);
2010
init_order(search->gap_align->matrix,program_flag,is_dna,seedSearch);
2012
for(i = 0; i < queryLength; i++)
2013
query[i] = seedSearch->order[query[i]];
2014
if (unfilter_query) {
2015
for(i = 0; i < queryLength; i++)
2016
unfilter_query[i] = seedSearch->order[unfilter_query[i]];
2020
posSearch = (posSearchItems *) MemNew(sizeof(posSearchItems));
2022
vnp = seedEngineCore(search, options, query, unfilter_query, readdb_get_filename(search->rdfp), patfile, program_flag, patfp, is_dna, FALSE, seedSearch, options->ethresh, 0.0, posSearch, &seqloc, show_diagnostics, info_vnp);
2024
if (tmp_file_made) /* Remove temporary pattern file if it exists. */
2027
MemFree(seedSearch);
2029
MemFree(posSearch->posResultSequences);
2032
*seqloc_ptr = seqloc;
2037
static BLAST_ScorePtr PNTR GetPSIMatrix(PSIDataPtr psidata, WWWBlastInfoPtr theInfo, Nlm_FloatHi *karlinK_out, Nlm_FloatHi ***posFreqs_out) {
2039
CharPtr chptr, Matrix62_last=NULL, pattern;
2041
Nlm_FloatHi karlinK;
2044
Int4 queryLength; /*length of query sequence*/
2046
SeqAlignPtr seqalign;
2047
SeqAlignPtr *lastSeqAligns=NULL;
2048
SeqLocPtr seg_slp; /*pointer to structure for seg filtering*/
2049
Uint1Ptr query = NULL; /*query sequence read in*/
2050
Uint1Ptr unfilter_query = NULL; /*needed if seg will filter query*/
2051
ValNodePtr info_vnp, phi_vnp = NULL;
2052
SeqLocPtr phi_seqloc = NULL;
2054
SeqIdPtr sip, all_sip = NULL;
2056
BlastSearchBlkPtr search;
2057
BLAST_ScorePtr PNTR posMatrix = NULL;
2058
compactSearchItems *compactSearch = NULL;
2059
posSearchItems *posSearch;
2060
CharPtr CHARPosFreqs_last;
2061
Nlm_FloatHi **posFreqs;
2063
/* first step; return NULL, means to use default matrix */
2064
if (psidata->StepNumber == 0)
2067
/* The second step; means, that there is list of GI's to
2069
using default or read matrix and list of seqaligns
2070
from limited search; limited search -
2071
search the query not in whole database but just
2072
in subset specified byt the list of GI's
2075
/* Create list of SeqIdPtr for the limited search */
2077
num_entries = WWWGetNumEntries(theInfo->info);
2079
for(i = 0; i < num_entries; i++) {
2080
GIListPtr good_gil, checked_gil;
2082
if((chptr = WWWGetNameByIndex(theInfo->info, i)) != NULL &&
2083
!StringICmp(chptr, "checked_GI")) {
2085
if((chptr = WWWGetValueByIndex(theInfo->info, i)) != NULL) {
2087
sip = ValNodeAddInt(NULL, SEQID_GI, gi);
2088
ValNodeLink(&all_sip, sip);
2090
/* Create list of GI's, it will be used when we
2093
if (!psidata->PrevCheckedGIs) {
2095
checked_gil = (GIListPtr) MemNew(sizeof(GIList));
2096
checked_gil->gi = gi;
2097
checked_gil->next = NULL;
2098
psidata->PrevCheckedGIs = checked_gil;
2100
checked_gil->next = (GIListPtr) MemNew(sizeof(GIList));
2101
checked_gil = checked_gil->next;
2102
checked_gil->gi = gi;
2103
checked_gil->next = NULL;
2106
} else if((chptr = WWWGetNameByIndex(theInfo->info, i)) != NULL &&
2107
!StringICmp(chptr, "good_GI")) {
2109
if((chptr = WWWGetValueByIndex(theInfo->info, i)) != NULL) {
2112
if (!psidata->PrevGoodGIs) {
2114
good_gil = (GIListPtr) MemNew(sizeof(GIList));
2116
good_gil->next = NULL;
2117
psidata->PrevGoodGIs = good_gil;
2119
good_gil->next = (GIListPtr) MemNew(sizeof(GIList));
2120
good_gil = good_gil->next;
2122
good_gil->next = NULL;
2128
/* So this search will be limited to the list of gis */
2129
theInfo->options->gilist = all_sip;
2131
/* Some additional parameters required for the matrix recalculation */
2133
theInfo->options->use_best_align = TRUE;
2134
theInfo->options->use_real_db_size = TRUE;
2138
if((search = BLASTSetUpSearchWithReadDb(theInfo->fake_bsp, theInfo->program, theInfo->query_bsp->length, theInfo->database, theInfo->options, WWWTickCallback )) == NULL) {
2142
search->positionBased = FALSE;
2144
if (psidata->StepNumber > 1) {
2145
/* The third and rest of steps; means, that
2146
before we are recalculating matrix, we
2147
should read old matrix from the previous step;
2150
/* Read matrix from the previous step */
2152
Matrix62_last = WWWGetValueByName(theInfo->info, "PSI_MATRIX");
2154
/* Decode read matrix */
2156
if(Matrix62_last != NULL && Matrix62_last[0] != NULLB) {
2157
search->positionBased = TRUE;
2158
search->sbp->posMatrix = Decode62Matrix (Matrix62_last, search->context[0].query->length, search->sbp->alphabet_size);
2161
CHARPosFreqs_last = WWWGetValueByName(theInfo->info, "POS_FREQS");
2163
if(CHARPosFreqs_last != NULL && CHARPosFreqs_last[0] != NULLB) {
2164
search->positionBased = TRUE;
2165
search->sbp->posFreqs = BLASTDecodePosFreqs(CHARPosFreqs_last, search->context[0].query->length, search->sbp->alphabet_size);
2170
fd = FileOpen("/tmp/new_freqs.float", "w");
2171
for(i = 0; i <= search->context[0].query->length; i++) {
2172
for(j =0; j < search->sbp->alphabet_size; j++) {
2173
fprintf(fd, "%f ", search->sbp->posFreqs[i][j]);
2183
if((chptr = WWWGetValueByName(theInfo->info,
2184
"PSI_KARLIN_K")) != NULL) {
2185
karlinK = atof(chptr);
2186
search->sbp->kbp_gap_psi[0]->K = karlinK;
2187
search->sbp->kbp_gap_psi[0]->logK = log(karlinK);
2190
} /* end reread the matrix */
2192
search->thr_info->tick_callback = NULL;
2194
pattern = WWWGetValueByName(theInfo->info, "PHI_PATTERN");
2196
/* If pattern is non-NULL, then it is a PHI-BLAST search. */
2198
query = BlastGetSequenceFromBioseq(theInfo->fake_bsp, &queryLength);
2199
seg_slp = BlastBioseqFilter(theInfo->fake_bsp, theInfo->options->filter_string);
2200
unfilter_query = NULL;
2202
unfilter_query = MemNew((queryLength + 1) * sizeof(Uint1));
2203
for (i = 0; i < queryLength; i++)
2204
unfilter_query[i] = query[i];
2205
BlastMaskTheResidues(query,queryLength,21,seg_slp,FALSE, 0);
2208
theInfo->options->isPatternSearch = TRUE;
2209
phi_vnp = seed_core_private(search, "patseedp", theInfo->options, &phi_seqloc, NULL, pattern, query, unfilter_query, queryLength, FALSE, NULL, &info_vnp);
2210
ValNodeFreeData(info_vnp);
2213
MemFree(unfilter_query);
2215
seqalign = convertValNodeListToSeqAlignList(phi_vnp, &lastSeqAligns, &numSeqAligns);
2216
ValNodeFree(phi_vnp);
2218
seqalign = BioseqBlastEngineCore(search, theInfo->options,
2219
search->sbp->posMatrix);
2222
if(search->sbp->posMatrix != NULL) {
2223
for(i = 0; i <= theInfo->fake_bsp->length; i++) {
2224
MemFree(search->sbp->posMatrix[i]);
2226
MemFree(search->sbp->posMatrix);
2227
search->sbp->posMatrix = NULL;
2230
/* Now finaly calculating matrix that will be used at this step */
2234
ReadDBBioseqFetchEnable("psiblast", theInfo->database,
2236
compactSearch = compactSearchNew(compactSearch);
2237
copySearchItems(compactSearch, search, theInfo->options->matrix);
2239
compactSearch->pseudoCountConst = 7;
2241
if (search->sbp->posFreqs == NULL) {
2242
search->sbp->posFreqs = allocatePosFreqs(compactSearch->qlength,
2243
compactSearch->alphabetSize);
2246
posMatrix = WposComputation(compactSearch, seqalign,
2247
search->sbp->posFreqs);
2249
/* We have to return posFreqs to the upper layer */
2250
posFreqs = allocatePosFreqs(compactSearch->qlength,
2251
compactSearch->alphabetSize);
2252
copyPosFreqs(search->sbp->posFreqs, posFreqs, compactSearch->qlength,
2253
compactSearch->alphabetSize);
2255
MemFree(compactSearch->standardProb);
2256
MemFree(compactSearch);
2258
ReadDBBioseqFetchDisable();
2260
/* Encode matrix for the use in the next step*/
2262
psidata->matrix62 = Encode62Matrix(posMatrix,
2263
search->context[0].query->length,
2264
search->sbp->alphabet_size);
2266
psidata->CHARPosFreqs = BLASTEncodePosFreqs(posFreqs, search->context[0].query->length, search->sbp->alphabet_size);
2271
fd = FileOpen("/tmp/old_freqs.float", "w");
2272
for(i = 0; i <= search->context[0].query->length; i++) {
2273
for(j =0; j < search->sbp->alphabet_size; j++) {
2274
fprintf(fd, "%f ", posFreqs[i][j]);
2279
fd = FileOpen("/tmp/old_freqs.buffer", "w");
2280
fprintf(fd, "%s", psidata->CHARPosFreqs);
2286
*karlinK_out = search->sbp->kbp_gap_psi[0]->K;
2287
*posFreqs_out = posFreqs;
2289
SeqAlignSetFree(seqalign);
2290
SeqLocFree(phi_seqloc);
2292
search = BlastSearchBlkDestruct(search);
2294
theInfo->options->use_best_align = FALSE;
2295
theInfo->options->use_real_db_size = FALSE;
2297
SeqIdSetFree(theInfo->options->gilist);
2298
theInfo->options->gilist = NULL;
2299
theInfo->options->isPatternSearch = FALSE;
2302
} /* end of GetPSIMatrix() */
2304
BLASTPrintDataPtr PSIBlastSearch(WWWBlastInfoPtr theInfo)
2306
BLASTPrintDataPtr print_data;
2307
ValNodePtr vnp, other_returns= NULL;
2308
Int4 i, num_entries;
2311
Char matrixname[64] = "BLOSUM62";
2312
Int4 opencost = 0, extendedcost = 0;
2313
BlastSearchBlkPtr search;
2314
BLAST_ScorePtr PNTR posMatrix = NULL;
2315
Nlm_FloatHi karlinK;
2316
BLAST_MatrixPtr matrix = NULL;
2317
Nlm_FloatHi **posFreqs;
2322
PrintRequestHeader(theInfo);
2323
print_data = (BLASTPrintDataPtr) MemNew(sizeof(BLASTPrintData));
2325
psidata = MemNew(sizeof(PSIData));
2326
psidata->PrevGoodGIs = NULL;
2327
psidata->PrevCheckedGIs = NULL;
2329
/* initialize the search */
2330
theInfo->options->pseudoCountConst = 7;
2332
if((search = BLASTSetUpSearchWithReadDb(theInfo->fake_bsp, theInfo->program, theInfo->query_bsp->length, theInfo->database, theInfo->options, WWWTickCallback )) == NULL) {
2336
/* Matrix and StepNumber for PSI-Blast */
2338
if((chptr = WWWGetValueByName(theInfo->info, "STEP_NUMBER")) != NULL)
2339
psidata->StepNumber = atoi(chptr);
2341
if((posMatrix = GetPSIMatrix(psidata, theInfo,
2342
&karlinK, &posFreqs)) != NULL) {
2343
search->positionBased = TRUE;
2344
search->sbp->kbp_gap_psi[0]->K = karlinK;
2345
search->sbp->kbp_gap_psi[0]->logK = log(karlinK);
2346
search->sbp->posFreqs = posFreqs;
2349
search->sbp->posMatrix = posMatrix;
2351
search->thr_info->tick_callback = WWWTickCallback;
2353
ReadDBBioseqFetchEnable("psiblast", theInfo->database,
2356
print_data->seqalign = BioseqBlastEngineCore(search, theInfo->options, posMatrix);
2358
ReadDBBioseqFetchDisable();
2363
ReadDBBioseqFetchEnable("psiblast", theInfo->database,
2365
compactSearch = compactSearchNew(compactSearch);
2366
copySearchItems(compactSearch, search, theInfo->options->matrix);
2368
compactSearch->pseudoCountConst = 7;
2370
if (search->sbp->posFreqs == NULL) {
2371
search->sbp->posFreqs = allocatePosFreqs(compactSearch->qlength,
2372
compactSearch->alphabetSize);
2375
posMatrix = WposComputation(compactSearch, seqalign,
2376
search->sbp->posFreqs);
2378
MemFree(compactSearch->standardProb);
2379
MemFree(compactSearch);
2382
/* Encode matrix for the use in the next step*/
2386
if(posMatrix != NULL) {
2387
for(i = 0; i <= theInfo->fake_bsp->length; i++) {
2388
MemFree(posMatrix[i]);
2392
search->sbp->posMatrix = NULL;
2395
print_data->psidata = psidata;
2399
other_returns = BlastOtherReturnsPrepare(search);
2401
print_data->mask_loc = NULL;
2403
for (vnp=other_returns; vnp; vnp = vnp->next) {
2404
switch (vnp->choice) {
2406
print_data->dbinfo = vnp->data.ptrvalue;
2409
print_data->ka_params =
2410
(BLAST_KarlinBlkPtr) vnp->data.ptrvalue;
2413
print_data->ka_params_gap =
2414
(BLAST_KarlinBlkPtr) vnp->data.ptrvalue;
2415
psidata->karlinK = print_data->ka_params_gap->K;
2418
print_data->buffer = vnp->data.ptrvalue;
2421
print_data->matrix = (BLAST_MatrixPtr) vnp->data.ptrvalue;
2422
/*BLAST_MatrixDestruct(matrix);*/
2423
vnp->data.ptrvalue = NULL;
2425
case SEQLOC_MASKING_NOTSET:
2426
case SEQLOC_MASKING_PLUS1:
2427
case SEQLOC_MASKING_PLUS2:
2428
case SEQLOC_MASKING_PLUS3:
2429
case SEQLOC_MASKING_MINUS1:
2430
case SEQLOC_MASKING_MINUS2:
2431
case SEQLOC_MASKING_MINUS3:
2432
ValNodeAddPointer(&(print_data->mask_loc), vnp->choice, vnp->data.ptrvalue);
2439
ValNodeFree(other_returns);
2441
search = BlastSearchBlkDestruct(search);
2446
BLASTPrintDataPtr PHIBlastSearch(WWWBlastInfoPtr theInfo)
2448
BLASTPrintDataPtr print_data;
2449
ValNodePtr vnp, other_returns= NULL;
2452
Int4 i, num_entries;
2453
Int4 queryLength; /*length of query sequence*/
2455
Char matrixname[64] = "BLOSUM62";
2456
Int4 opencost = 0, extendedcost = 0;
2457
SeqLocPtr seqloc=NULL;
2458
SeqLocPtr seg_slp; /*pointer to structure for seg filtering*/
2459
Uint1Ptr query = NULL; /*query sequence read in*/
2460
Uint1Ptr unfilter_query = NULL; /*needed if seg will filter query*/
2461
ValNodePtr info_vnp;
2462
BlastSearchBlkPtr search;
2463
BLAST_MatrixPtr matrix = NULL;
2468
PrintRequestHeader(theInfo);
2469
print_data = (BLASTPrintDataPtr) MemNew(sizeof(BLASTPrintData));
2471
psidata = MemNew(sizeof(PSIData));
2472
psidata->PrevGoodGIs = NULL;
2473
psidata->PrevCheckedGIs = NULL;
2475
print_data->psidata = psidata;
2477
/* Get matrix name and gap costs */
2479
if((chptr = WWWGetValueByName(theInfo->info, "MAT_PARAM")) != NULL) {
2480
if (chptr[1] != '-' || chptr[2] != '-')
2481
sscanf(chptr, "%s\t %d\t %d", matrixname, &opencost, &extendedcost);
2484
/* Change matrix parameters */
2486
BLASTOptionSetGapParams (theInfo->options, matrixname,
2487
opencost, extendedcost);
2489
chptr = WWWGetValueByName(theInfo->info, "PHI_PATTERN");
2491
/* Reguilar PHI-Blast search */
2492
theInfo->options->isPatternSearch = TRUE;
2494
if((search = BLASTSetUpSearchWithReadDb(theInfo->fake_bsp, "blastp", theInfo->query_bsp->length, theInfo->database, theInfo->options, WWWTickCallback)) == NULL) {
2498
query = BlastGetSequenceFromBioseq(theInfo->fake_bsp, &queryLength);
2499
seg_slp = BlastBioseqFilter(theInfo->fake_bsp,
2500
theInfo->options->filter_string);
2502
unfilter_query = NULL;
2504
unfilter_query = MemNew((queryLength + 1) * sizeof(Uint1));
2505
for (i = 0; i < queryLength; i++)
2506
unfilter_query[i] = query[i];
2507
BlastMaskTheResidues(query,queryLength,21,seg_slp,FALSE, 0);
2510
print_data->vnp = seed_core_private(search, "patseedp", theInfo->options, &(print_data->seqloc), NULL, chptr, query, unfilter_query, queryLength, TRUE, ¶mC, &info_vnp);
2512
print_data->info_vnp = info_vnp;
2515
MemFree(unfilter_query);
2519
other_returns = BlastOtherReturnsPrepare(search);
2520
print_data->mask_loc = NULL;
2521
for (vnp=other_returns; vnp; vnp = vnp->next) {
2522
switch (vnp->choice) {
2524
print_data->dbinfo = vnp->data.ptrvalue;
2527
print_data->ka_params_gap = vnp->data.ptrvalue;
2528
/* print_data->ka_params_gap->paramC = paramC; ?? */
2531
print_data->ka_params = vnp->data.ptrvalue;
2534
print_data->buffer = vnp->data.ptrvalue;
2537
print_data->matrix = (BLAST_MatrixPtr) vnp->data.ptrvalue;
2538
/*BLAST_MatrixDestruct(matrix);*/
2539
vnp->data.ptrvalue = NULL;
2541
case SEQLOC_MASKING_NOTSET:
2542
case SEQLOC_MASKING_PLUS1:
2543
case SEQLOC_MASKING_PLUS2:
2544
case SEQLOC_MASKING_PLUS3:
2545
case SEQLOC_MASKING_MINUS1:
2546
case SEQLOC_MASKING_MINUS2:
2547
case SEQLOC_MASKING_MINUS3:
2548
ValNodeAddPointer(&(print_data->mask_loc), vnp->choice, vnp->data.ptrvalue);
2555
search = BlastSearchBlkDestruct(search);
2561
static void printSubmitButton(FILE* fp, Int4 step)
2563
fprintf(fp, "<INPUT TYPE=\"submit\" NAME=\"NEXT_I\" "
2564
"VALUE=\"Run PSI-Blast iteration %d\">\n",
2568
static Int4 get_number_alignment(SeqAlignPtr align)
2574
align = align->next;
2580
Boolean PHIPrintOutput(WWWBlastInfoPtr theInfo,
2581
BLASTPrintDataPtr print_data,
2582
ValNodePtr vnp, Nlm_FloatHi ethresh_old)
2584
Uint4 align_options, print_options;
2585
SeqAnnotPtr seqannot;
2586
BlastTimeKeeper time_keeper;
2587
BlastPruneSapStructPtr prune;
2588
Uint1 f_order[FEATDEF_ANY], g_order[FEATDEF_ANY];
2589
Char hostname[30], buffer[32];
2592
Char f_name[64], title[1024];
2594
Int2 count, countBad, countGood;
2597
SeqAlignPtr seqalign;
2598
SeqAlignPtr *lastSeqAligns=NULL;
2599
SeqAlignPtr lastGood, BadSeqAlignments, GoodSeqAlignments;
2602
Int4Ptr PNTR txmatrix;
2604
MemSet((Pointer)(g_order), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
2605
MemSet((Pointer)(f_order), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
2607
if(print_data == NULL) {
2608
WWWBlastErrMessage(BLASTMiscError, NULL);
2615
align_options += TXALIGN_COMPRESS;
2616
align_options += TXALIGN_END_NUM;
2618
if (theInfo->show_gi) {
2619
align_options += TXALIGN_SHOW_GI;
2620
print_options += TXALIGN_SHOW_GI;
2623
if (theInfo->options->gapped_calculation == FALSE)
2624
print_options += TXALIGN_SHOW_NO_OF_SEGS;
2627
if (theInfo->align_view) {
2628
align_options += TXALIGN_MASTER;
2630
if (theInfo->align_view == 1 || theInfo->align_view == 3)
2631
align_options += TXALIGN_MISMATCH;
2633
if (theInfo->align_view == 3 || theInfo->align_view == 4 ||
2634
theInfo->align_view == 6)
2635
align_options += TXALIGN_FLAT_INS;
2637
if (theInfo->align_view == 5 || theInfo->align_view == 6)
2638
align_options += TXALIGN_BLUNT_END;
2640
align_options += TXALIGN_MATRIX_VAL;
2641
align_options += TXALIGN_SHOW_QS;
2644
/* align_options += TXALIGN_MATRIX_VAL;
2645
align_options += TXALIGN_SHOW_QS; */
2647
align_options += TXALIGN_HTML;
2648
print_options += TXALIGN_HTML;
2650
ReadDBBioseqFetchEnable ("phiblast",
2651
theInfo->database, theInfo->db_is_na, TRUE);
2653
seqannot = SeqAnnotNew();
2655
AddAlignInfoToSeqAnnot(seqannot, theInfo->align_type);
2659
/* gethostname(hostname, sizeof(hostname)); */
2661
sprintf(href, "%s/nph-viewgif.cgi?", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2663
seqalign = convertValNodeListToSeqAlignList(print_data->vnp, &lastSeqAligns, &numSeqAligns);
2664
seqannot->data = seqalign;
2665
if (theInfo->show_overview) {
2666
sprintf(f_name, "%ld%ld.gif", (long)random(), (long)getpid());
2667
align_num = get_number_alignment((SeqAlignPtr)(seqannot->data));
2668
sprintf(title, "<H3><a href=\"%s/docs/newoptions.html#graphical-overview\"> "
2669
"Distribution of %ld Blast Hits on the Query Sequence</a></H3>\n", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path, (long)align_num);
2671
PrintAlignmentOverview(seqannot, stdout, "PSI_BLAST", href, f_name, title);
2674
seqannot->data = NULL;
2675
seqannot = SeqAnnotFree(seqannot);
2677
print_data->vnp = convertSeqAlignListToValNodeList(seqalign, lastSeqAligns, numSeqAligns);
2679
print_options += TXALIGN_DO_NOT_PRINT_TITLE;
2680
print_options += TXALIGN_CHECK_BOX;
2681
if (print_data->psidata->StepNumber)
2682
print_options += TXALIGN_NEW_GIF;
2685
printSubmitButton(stdout,
2686
print_data->psidata->StepNumber+1);
2688
if (print_data->psidata->StepNumber && theInfo->number_of_descriptions) {
2689
printf("<HR><p><b>Legend:</b><p>\
2690
<IMG SRC=\"%s/images/new.gif\" WIDTH=25 HEIGHT=15> - means that \
2691
the alignment score was below the threshold on the previous iteration \
2693
<IMG SRC=\"%s/images/checked.gif\" WIDTH=15 HEIGHT=15> - means that \
2694
the alignment was checked on the previous iteration \
2695
</p>", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path,
2696
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2700
if (theInfo->number_of_descriptions) {
2701
if (print_data->psidata->StepNumber)
2702
printf("\n<IMG SRC=\"/BLAST/bg.gif\" WIDTH=65 HEIGHT=15>");
2704
printf("\nSequences producing significant alignments:");
2705
if (print_data->psidata->StepNumber)
2706
printf("<IMG SRC=\"/BLAST/bg.gif\" WIDTH=65 HEIGHT=15>");
2707
printf(" (bits) Value\n\n");
2712
seqloc = print_data->seqloc;
2715
SplitSeqAlign((SeqAlignPtr) vnp_var->data.ptrvalue, &GoodSeqAlignments, &BadSeqAlignments, &lastGood, &marks,
2716
&countBad, &countGood, ethresh_old);
2718
printf("<HR><CENTER><b><FONT color=\"green\">"
2719
"Sequences with pattern at position %d and E-value BETTER than threshold</FONT></b></CENTER>\n",
2720
SeqLocStart(seqloc)+1);
2722
if (print_data->psidata->StepNumber)
2723
printf("\n<IMG SRC=\"%s/images/bg.gif\" WIDTH=65 HEIGHT=15>", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2725
printf("\nSequences producing significant alignments:");
2726
if (print_data->psidata->StepNumber)
2727
printf("<IMG SRC=\"%s/images/bg.gif\" WIDTH=65 HEIGHT=15>",
2728
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2729
printf(" (bits) Value\n\n");
2732
print_options += TXALIGN_CHECK_BOX_CHECKED;
2733
PrintDefLinesFromSeqAlignEx(GoodSeqAlignments, 80, stdout, print_options, FIRST_PASS, marks, theInfo->number_of_descriptions);
2735
print_options -= TXALIGN_CHECK_BOX_CHECKED;
2737
if (print_data->psidata->StepNumber == 0)
2738
printf("<a name = Evalue> </a>");
2740
if (theInfo->number_of_descriptions > countGood && BadSeqAlignments) {
2742
printSubmitButton(stdout, print_data->psidata->StepNumber+1);
2744
printf("<HR><CENTER><b><FONT color=\"green\">"
2745
"Sequences with pattern at position %d and E-value WORSE than threshold</FONT></b></CENTER>\n",
2746
SeqLocStart(seqloc)+1);
2748
PrintDefLinesFromSeqAlignEx(BadSeqAlignments, 80, stdout, print_options, FIRST_PASS, &marks[countGood], theInfo->number_of_descriptions - countGood);
2751
marks = MemFree(marks);
2755
lastGood->next = BadSeqAlignments;
2757
vnp_var = vnp_var->next;
2758
seqloc = seqloc->next;
2761
if (theInfo->number_of_descriptions) {
2763
printSubmitButton(stdout,
2764
print_data->psidata->StepNumber+1);
2770
GlobalAlignNumber = 0;
2772
fprintf(stdout, "<HR>");
2775
if (print_data->matrix)
2776
txmatrix = (Int4Ptr PNTR) BlastMatrixToTxMatrix(print_data->matrix);
2778
if (theInfo->number_of_alignments) {
2779
fprintf(stdout, "<CENTER><b><FONT color=\"green\">"
2780
"Alignments</FONT></b></CENTER>\n");
2782
f_order[FEATDEF_REGION] = 1;
2783
g_order[FEATDEF_REGION] = 1;
2784
if(theInfo->align_view == 0) {
2785
ShowTextAlignFromAnnotExtra(theInfo->fake_bsp,
2787
print_data->seqloc, 60,
2789
f_order, g_order, align_options,
2790
txmatrix, print_data->mask_loc,
2793
ShowTextAlignFromAnnotExtra(theInfo->fake_bsp,
2795
print_data->seqloc, 60,
2797
f_order, g_order, align_options,
2798
txmatrix, print_data->mask_loc,
2809
BlastTimeFillStructure(&time_keeper);
2811
fprintf(stdout, "CPU time: %8.2f user secs.\t%8.2f sys. "
2812
"secs\t%8.2f total secs.\n\n",
2813
time_keeper.user, time_keeper.system, time_keeper.total);
2816
txmatrix = (Int4Ptr PNTR) TxMatrixDestruct(txmatrix);
2817
print_data->matrix = BLAST_MatrixDestruct(print_data->matrix);
2820
PrintDbReport(print_data->dbinfo, 70, stdout);
2823
if (print_data->ka_params_gap) {
2824
PrintKAParameters(print_data->ka_params_gap->Lambda,
2825
print_data->ka_params_gap->K,
2826
print_data->ka_params_gap->H,
2831
PGPOutTextMessages(print_data->info_vnp, stdout);
2833
PrintTildeSepLines(print_data->buffer, 70, stdout);
2838
ReadDBBioseqFetchDisable();
2843
Boolean PSIPrintOutput(WWWBlastInfoPtr theInfo,
2844
BLASTPrintDataPtr print_data,
2845
SeqAlignPtr BadSeqAlignments, SeqAlignPtr GoodSeqAlignments,
2846
SeqAlignPtr lastGood,
2847
Int2Ptr marks, Int2 countBad, Int2 countGood,
2848
Nlm_FloatHi ethresh_old)
2850
Uint4 align_options, print_options;
2851
SeqAnnotPtr seqannot;
2852
BlastTimeKeeper time_keeper;
2853
BlastPruneSapStructPtr prune;
2854
Uint1 f_order[FEATDEF_ANY], g_order[FEATDEF_ANY];
2855
Char hostname[30], buffer[32];
2858
Char f_name[64], title[1024];
2861
Int4Ptr PNTR txmatrix;
2863
MemSet((Pointer)(g_order), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
2864
MemSet((Pointer)(f_order), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
2866
if(print_data == NULL) {
2867
WWWBlastErrMessage(BLASTMiscError, NULL);
2874
align_options += TXALIGN_COMPRESS;
2875
align_options += TXALIGN_END_NUM;
2877
if (theInfo->show_gi) {
2878
align_options += TXALIGN_SHOW_GI;
2879
print_options += TXALIGN_SHOW_GI;
2882
if (theInfo->options->gapped_calculation == FALSE)
2883
print_options += TXALIGN_SHOW_NO_OF_SEGS;
2885
if (theInfo->align_view) {
2886
align_options += TXALIGN_MASTER;
2888
if (theInfo->align_view == 1 || theInfo->align_view == 3)
2889
align_options += TXALIGN_MISMATCH;
2891
if (theInfo->align_view == 3 || theInfo->align_view == 4 ||
2892
theInfo->align_view == 6)
2893
align_options += TXALIGN_FLAT_INS;
2895
if (theInfo->align_view == 5 || theInfo->align_view == 6)
2896
align_options += TXALIGN_BLUNT_END;
2898
align_options += TXALIGN_MATRIX_VAL;
2899
align_options += TXALIGN_SHOW_QS;
2902
/* align_options += TXALIGN_MATRIX_VAL;
2903
align_options += TXALIGN_SHOW_QS; */
2905
align_options += TXALIGN_HTML;
2906
print_options += TXALIGN_HTML;
2908
ReadDBBioseqFetchEnable ("psiblast",
2913
seqannot = SeqAnnotNew();
2915
AddAlignInfoToSeqAnnot(seqannot, theInfo->align_type);
2916
seqannot->data = print_data->seqalign;
2922
lastGood->next = BadSeqAlignments;
2924
/* gethostname(hostname, sizeof(hostname)); */
2926
sprintf(href, "%s/nph-viewgif.cgi?",
2927
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2929
if (theInfo->show_overview) {
2930
sprintf(f_name, "%ld%ld.gif", (long)random(), (long)getpid());
2931
align_num = get_number_alignment((SeqAlignPtr)(seqannot->data));
2932
sprintf(title, "<H3><a href=\"%s/docs/newoptions.html#graphical-overview\"> "
2933
"Distribution of %ld Blast Hits on the Query Sequence</a></H3>\n", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path, (long)align_num);
2935
PrintAlignmentOverview(seqannot, stdout, "PSI_BLAST", href, f_name, title);
2938
/* separate lists */
2940
lastGood->next = NULL;
2942
print_options += TXALIGN_DO_NOT_PRINT_TITLE;
2943
print_options += TXALIGN_CHECK_BOX;
2944
print_options += TXALIGN_CHECK_BOX_CHECKED;
2945
if (print_data->psidata->StepNumber)
2946
print_options += TXALIGN_NEW_GIF;
2949
printSubmitButton(stdout,
2950
print_data->psidata->StepNumber+1);
2952
if (print_data->psidata->StepNumber && theInfo->number_of_descriptions) {
2953
printf("<HR><p><b>Legend:</b><p>\
2954
<IMG SRC=\"%s/images/new.gif\" WIDTH=25 HEIGHT=15> - means that \
2955
the alignment score was below the threshold on the previous iteration \
2957
<IMG SRC=\"%s/images/checked.gif\" WIDTH=15 HEIGHT=15> - means that \
2958
the alignment was checked on the previous iteration \
2959
</p>", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path,
2960
theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2963
if (theInfo->number_of_descriptions) {
2964
printf("<HR><CENTER><b><FONT color=\"green\">"
2965
"Sequences with E-value BETTER than threshold </FONT></b></CENTER>\n");
2966
if (print_data->psidata->StepNumber)
2967
printf("\n<IMG SRC=\"%s/images/bg.gif\" WIDTH=65 HEIGHT=15>", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2969
printf("\nSequences producing significant alignments:");
2970
if (print_data->psidata->StepNumber)
2971
printf("<IMG SRC=\"%s/images/bg.gif\" WIDTH=65 HEIGHT=15>", theInfo->www_root_path == NULL? "/blast" : theInfo->www_root_path);
2972
printf(" (bits) Value\n\n");
2975
PrintDefLinesFromSeqAlignEx(GoodSeqAlignments, 80, stdout,
2976
print_options, FIRST_PASS, marks, theInfo->number_of_descriptions);
2978
print_options -= TXALIGN_CHECK_BOX_CHECKED;
2980
if (print_data->psidata->StepNumber == 0)
2981
printf("<a name = Evalue> </a>");
2983
if (theInfo->number_of_descriptions > countGood && BadSeqAlignments) {
2985
printSubmitButton(stdout,
2986
print_data->psidata->StepNumber+1);
2988
printf("<HR><CENTER><b><FONT color=\"green\">"
2989
"Sequences with E-value WORSE than threshold </FONT></b></CENTER>\n");
2991
PrintDefLinesFromSeqAlignEx(BadSeqAlignments, 80, stdout, print_options, FIRST_PASS, &marks[countGood], theInfo->number_of_descriptions - countGood);
2994
if (theInfo->number_of_descriptions) {
2995
printSubmitButton(stdout,
2996
print_data->psidata->StepNumber+1);
3002
GlobalAlignNumber = 0;
3006
lastGood->next = BadSeqAlignments;
3008
prune = BlastPruneHitsFromSeqAlign((SeqAlignPtr) seqannot->data, theInfo->number_of_alignments, NULL);
3009
seqannot->data = prune->sap;
3011
fprintf(stdout, "<HR>");
3014
if (print_data->matrix)
3015
txmatrix = (Int4Ptr PNTR) BlastMatrixToTxMatrix(print_data->matrix);
3017
if (theInfo->number_of_alignments) {
3018
fprintf(stdout, "<CENTER><b><FONT color=\"green\">"
3019
"Alignments</FONT></b></CENTER>\n");
3021
/* New DDV formating requested */
3022
if(theInfo->color_schema != 0) {
3023
if(!DDV_DisplayBlastPairList(prune->sap, print_data->mask_loc,
3025
theInfo->query_is_na, align_options,
3026
theInfo->color_schema)) {
3029
" -------- Failure to print alignment... --------"
3033
} else { /* Old type formating */
3034
if (theInfo->align_view == 0) {
3035
ShowTextAlignFromAnnot2(seqannot, 60, stdout, f_order,
3036
g_order, align_options, txmatrix,
3037
print_data->mask_loc,
3038
FormatScoreFunc, theInfo->database,
3041
ShowTextAlignFromAnnot(seqannot, 60, stdout, f_order,
3042
g_order, align_options, txmatrix,
3043
print_data->mask_loc, NULL);
3049
/* separate lists */
3051
lastGood->next = NULL;
3056
prune = BlastPruneSapStructDestruct(prune);
3058
seqannot->data = NULL;
3059
seqannot = SeqAnnotFree(seqannot);
3063
BlastTimeFillStructure(&time_keeper);
3065
fprintf(stdout, "CPU time: %8.2f user secs.\t%8.2f sys. "
3066
"secs\t%8.2f total secs.\n\n",
3067
time_keeper.user, time_keeper.system, time_keeper.total);
3069
print_data->matrix = BLAST_MatrixDestruct(print_data->matrix);
3071
txmatrix = (Int4Ptr PNTR) TxMatrixDestruct(txmatrix);
3074
PrintDbReport(print_data->dbinfo, 70, stdout);
3076
if (print_data->ka_params) {
3077
PrintKAParameters(print_data->ka_params->Lambda,
3078
print_data->ka_params->K,
3079
print_data->ka_params->H, 70,
3083
if (print_data->ka_params_gap) {
3084
PrintKAParameters(print_data->ka_params_gap->Lambda,
3085
print_data->ka_params_gap->K,
3086
print_data->ka_params_gap->H,
3090
PrintTildeSepLines(print_data->buffer, 70, stdout);
3095
ReadDBBioseqFetchDisable();