1
/* $Id: cddumper.c,v 1.12 2001/11/13 19:52:56 bauer Exp $
2
*===========================================================================
5
* National Center for Biotechnology Information
7
* This software/database is a "United States Government Work" under the
8
* terms of the United States Copyright Act. It was written as part of
9
* the author's official duties as a United States Government employee and
10
* thus cannot be copyrighted. This software/database is freely available
11
* to the public for use. The National Library of Medicine and the U.S.
12
* Government have not placed any restriction on its use or reproduction.
14
* Although all reasonable efforts have been taken to ensure the accuracy
15
* and reliability of the software and data, the NLM and the U.S.
16
* Government do not and cannot warrant the performance or results that
17
* may be obtained by using this software or data. The NLM and the U.S.
18
* Government disclaim all warranties, express or implied, including
19
* warranties of performance, merchantability or fitness for any particular
22
* Please cite the author in any work or product based on this material.
24
* ===========================================================================
26
* File Name: cddumper.c
28
* Author: Aron Marchler-Bauer
30
* Initial Version Creation Date: 10/30/2000
34
* File Description: CD-dumper, rebuilt from scrap parts
38
* --------------------------------------------------------------------------
39
* $Log: cddumper.c,v $
40
* Revision 1.12 2001/11/13 19:52:56 bauer
43
* Revision 1.11 2001/05/23 21:18:06 bauer
44
* added functions for alignment block structure control
46
* Revision 1.10 2001/04/10 20:28:45 bauer
47
* optional flag for not saving out the CD
49
* Revision 1.9 2001/03/07 20:28:38 bauer
50
* CddFree commented out
52
* Revision 1.8 2001/02/28 19:43:01 bauer
53
* resort sequences when reindexing to new master
55
* Revision 1.7 2001/02/16 03:09:30 bauer
56
* added ability to reindex to new master
58
* Revision 1.6 2001/02/13 20:56:21 bauer
59
* added capability to read in DenSeg alignments
61
* Revision 1.5 2001/02/06 22:46:46 bauer
62
* Scoring Matrix as program parameter
64
* Revision 1.4 2001/02/05 22:45:54 bauer
65
* added support for split sets
67
* Revision 1.3 2001/01/24 03:08:08 bauer
70
* Revision 1.2 2001/01/17 21:32:01 bauer
71
* changes to PSSM calculation
73
* Revision 1.1 2001/01/17 20:58:36 bauer
74
* add cddumper, the new version of cddump
78
* ==========================================================================
93
static Args myargs[NUMARGS] = {
94
{"Cd-Accession", /*0*/
95
"RHO", NULL,NULL,FALSE,'c',ARG_STRING, 0.0,0,NULL},
96
{"Extension for ASN.1 output file name", /*1*/
97
"acd", NULL,NULL,FALSE,'a',ARG_STRING, 0.0,0,NULL},
98
{ "Binary output CD", /*2*/
99
"F", NULL,NULL,FALSE,'b',ARG_BOOLEAN,0.0,0,NULL},
100
{ "Store PSSM in CD", /*3*/
101
"F", NULL,NULL,FALSE,'k',ARG_BOOLEAN,0.0,0,NULL},
102
{ "Source Identifier", /*4*/
103
NULL, NULL,NULL,TRUE, 's',ARG_STRING, 0.0,0,NULL},
104
{ "Convert Dense Diag to true multiple Alignment", /*5*/
105
"F", NULL,NULL,FALSE,'m',ARG_BOOLEAN,0.0,0,NULL},
106
{ "File extension for tree file", /*6*/
107
"act", NULL,NULL,FALSE,'t',ARG_STRING, 0.0,0,NULL},
108
{ "Reference file extension", /*7*/
109
"REF", NULL,NULL,FALSE,'r',ARG_STRING, 0.0,0,NULL},
110
{ "Status flag for CDD (1=finished,2=pending,3=oAsIs", /*8*/
111
"2", NULL,NULL,FALSE,'f',ARG_INT, 0.0,0,NULL},
112
{ "Calculate a consensus sequence", /*9*/
113
"F", NULL,NULL,FALSE,'C',ARG_BOOLEAN,0.0,0,NULL},
114
{ "Pseudocount constant (integers 1-10)", /*10*/
115
"-1", NULL,NULL,FALSE,'p',ARG_INT, 0.0,0,NULL},
116
{ "Use this real-ind file to import alignment", /*11*/
117
NULL, NULL,NULL,TRUE, 'i',ARG_STRING, 0.0,0,NULL},
118
{ "Update this existing CD (local file name)", /*12*/
119
NULL, NULL,NULL,TRUE, 'u',ARG_STRING, 0.0,0,NULL},
120
{ "Binary input CD", /*13*/
121
"F", NULL,NULL,FALSE,'e',ARG_BOOLEAN,0.0,0,NULL},
122
{ "New accession for CD in case of update", /*14*/
123
NULL, NULL,NULL,TRUE, 'n',ARG_STRING, 0.0,0,NULL},
124
{ "Parse new name from real-ind file", /*15*/
125
"T", NULL,NULL,FALSE,'N',ARG_BOOLEAN,0.0,0,NULL},
126
{ "Scoring Matrix (PAM30, BLOSUM62, ..)", /*16*/
127
"BLOSUM62",NULL,NULL,FALSE,'M',ARG_STRING, 0.0,0,NULL},
128
{ "Make this PDB-derived sequence the new master", /*17*/
129
NULL, NULL,NULL,TRUE, 'R',ARG_STRING, 0.0,0,NULL},
130
{ "Output PSSMs only", /*18*/
131
"F", NULL,NULL,FALSE,'o',ARG_BOOLEAN,0.0,0,NULL},
132
{ "trim Bioseqs", /*19*/
133
"T", NULL,NULL,FALSE,'T',ARG_BOOLEAN,0.0,0,NULL},
134
{ "trim SeqAligns if not an oAsIs CD", /*20*/
135
"T", NULL,NULL,FALSE,'A',ARG_BOOLEAN,0.0,0,NULL},
136
{ "retrieve superposition data from PubVast", /*21*/
137
"T", NULL,NULL,FALSE,'v',ARG_BOOLEAN,0.0,0,NULL},
138
{ "Parse short name from description, move to name field",/*22*/
139
"F", NULL,NULL,TRUE,'P', ARG_BOOLEAN,0.0,0,NULL}
142
/*---------------------------------------------------------------------------*/
143
/*---------------------------------------------------------------------------*/
144
/* read parameters from configuration file */
145
/*---------------------------------------------------------------------------*/
146
/*---------------------------------------------------------------------------*/
147
static Boolean CddGetParams()
149
URLBase[0] = URLcgi[0] = ENTREZurl[0] = DOCSUMurl[0] = MAILto[0] = '\0';
150
MMDBpath[0] = gunzip[0] = '\0';
152
GetAppParam("cdd", "CDDSRV", "URLBase", "", URLBase, PATH_MAX);
153
if (URLBase[0] == '\0') {
154
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no URLBase...\n");
157
GetAppParam("cdd", "CDDSRV", "URLcgi", "", URLcgi, PATH_MAX);
158
if (URLcgi[0] == '\0') {
159
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no URLcgi...\n");
162
GetAppParam("cdd", "CDDSRV", "ENTREZurl", "", ENTREZurl, PATH_MAX);
163
if (ENTREZurl[0] == '\0') {
164
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no ENTREZurl...\n");
167
GetAppParam("cdd", "CDDSRV", "DOCSUMurl", "", DOCSUMurl, PATH_MAX);
168
if (DOCSUMurl[0] == '\0') {
169
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no DOCSUMurl...\n");
172
GetAppParam("cdd", "CDDSRV", "Gunzip", "", gunzip, (size_t) 256*(sizeof(Char)));
173
if (gunzip[0] == '\0') {
174
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no Gunzip...\n");
177
GetAppParam("cdd", "CDDSRV", "Database", "", MMDBpath, PATH_MAX);
178
if (MMDBpath[0] == '\0') {
179
ErrPostEx(SEV_FATAL,0,0,"MMDB config file\nMMDBSRV section has no Database...\n");
182
GetAppParam("cdd", "CDDSRV", "MAILto", "", MAILto, PATH_MAX);
183
if (MAILto[0] == '\0') {
184
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no MAILto...\n");
187
GetAppParam("cdd", "CDDSRV", "DATApath", "", DATApath, PATH_MAX);
188
if (DATApath[0] == '\0') {
189
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no VAST Data path...\n");
192
GetAppParam("cdd", "CDDSRV", "CDDpath", "", CDDpath, PATH_MAX);
193
if (CDDpath[0] == '\0') {
194
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no VAST html path...\n");
197
GetAppParam("cdd", "CDDSRV", "CDDatabase", "", CDDdpath, PATH_MAX);
198
if (CDDdpath[0] == '\0') {
199
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no CDD data path...\n");
202
GetAppParam("cdd", "CDDSRV", "CVDatabase", "", CDDvpath, PATH_MAX);
203
if (CDDvpath[0] == '\0') {
204
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no CDD/VAST data path...\n");
207
GetAppParam("cdd", "CDDSRV", "CDDextens", "", CDDextens, PATH_MAX);
208
if (CDDextens[0] == '\0') {
209
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no CDD file name extension...\n");
212
GetAppParam("cdd", "CDDSRV", "RAWextens", "", RAWextens, PATH_MAX);
213
if (RAWextens[0] == '\0') {
214
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no RAW file name extension...\n");
217
GetAppParam("cdd", "CDDSRV", "CDDdescr", "", CDDdescr, PATH_MAX);
218
if (CDDdescr[0] == '\0') {
219
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no description file name extension...\n");
222
GetAppParam("cdd", "CDDSRV", "Database", "", database, PATH_MAX);
223
if (database[0] == '\0') {
224
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no VAST data path...\n");
227
GetAppParam("cdd", "CDDSRV", "REFpath", "", REFpath, PATH_MAX);
228
if (database[0] == '\0') {
229
ErrPostEx(SEV_FATAL,0,0,"CDD config file has no REFpath...\n");
233
} /* end GetVastParams */
236
/*---------------------------------------------------------------------------*/
237
/*---------------------------------------------------------------------------*/
238
/* Read in Sequence Alignment Data formatted as ASN.1 */
239
/*---------------------------------------------------------------------------*/
240
/*---------------------------------------------------------------------------*/
241
SeqAlignPtr CddReadSeqAln() {
243
SeqAnnotPtr psaCAlignHead = NULL;
245
Char CDDalign[PATH_MAX];
248
strcpy(CDDalign,CDDdpath);
249
strcat(CDDalign,cCDDid);
250
strcat(CDDalign,CDDextens);
251
aip = AsnIoOpen(CDDalign,"r");
252
psaCAlignHead = SeqAnnotAsnRead(aip, NULL);
254
if (!psaCAlignHead) CddSevError("Could not access CDD alignment!");
255
salp = psaCAlignHead->data;
256
if (salp->segtype == SAS_DENDIAG) {
258
} else if (salp->segtype == SAS_DENSEG) {
259
return (CddMSLDenSegToMSLDenDiag(salp));
264
/*---------------------------------------------------------------------------*/
265
/*---------------------------------------------------------------------------*/
266
/* Check whether a particular gi has occured previously in the CddSum lnklst */
267
/*---------------------------------------------------------------------------*/
268
/*---------------------------------------------------------------------------*/
269
static Boolean UniqueUid(Int4 uid, Boolean bIsPdb, CddSumPtr pcds) {
273
if (!pcds) return TRUE;
276
if (pcdsThis->uid == uid && pcdsThis->bIsPdb == bIsPdb) return FALSE;
277
pcdsThis = pcdsThis->next;
282
/*---------------------------------------------------------------------------*/
283
/*---------------------------------------------------------------------------*/
284
/* Stolen from mmdbsrv.c - converts PDB-Id to numerical mmdb-id */
285
/* modified - does no longer check whether the string corresponds to an */
286
/* integer in the first place, so that PDB-Id's like "1914" can be read as */
288
/*---------------------------------------------------------------------------*/
289
/*---------------------------------------------------------------------------*/
290
static Int4 ConvertMMDBUID(CharPtr pcString)
293
CharPtr pcTemp = NULL;
295
if (pcString == NULL) return 0;
297
pcTemp = StringSave(pcString);
299
iUID = MMDBEvalPDB(pcTemp);
304
/*---------------------------------------------------------------------------*/
305
/*---------------------------------------------------------------------------*/
306
/* allocate a new CddSum linked list entry */
307
/*---------------------------------------------------------------------------*/
308
/*---------------------------------------------------------------------------*/
309
static CddSumPtr CddSumNew()
313
pcds=(CddSumPtr)MemNew(sizeof(CddSum));
314
if (pcds==NULL) return(pcds);
315
pcds->bIsPdb = FALSE;
316
pcds->bIsMaster = FALSE;
317
pcds->cPdbId[0] = '\0';
318
pcds->cChainId[0] = '\0';
319
pcds->cPKBMDom[0] = '\0';
320
pcds->cPKBDom[0] = '\0';
321
pcds->cDefLine[0] = '\0';
332
/*---------------------------------------------------------------------------*/
333
/*---------------------------------------------------------------------------*/
334
/* Free a CddSum linked list */
335
/*---------------------------------------------------------------------------*/
336
/*---------------------------------------------------------------------------*/
337
static CddSumPtr CddSumFree(CddSumPtr pcds)
349
/*---------------------------------------------------------------------------*/
350
/*---------------------------------------------------------------------------*/
351
/* adds a to a linked list of CddSumPtr, always returns the beginning of the */
352
/* list and always adds to the end of the list!! */
353
/*---------------------------------------------------------------------------*/
354
/*---------------------------------------------------------------------------*/
355
static CddSumPtr CddSumLink(CddSumPtr PNTR head, CddSumPtr newnode)
359
if (head == NULL) return newnode;
362
while(pcds->next != NULL) pcds = pcds->next;
363
pcds->next = newnode;
364
} else *head = newnode;
369
/*---------------------------------------------------------------------------*/
370
/*---------------------------------------------------------------------------*/
371
/* Process Sequence Alignment */
372
/*---------------------------------------------------------------------------*/
373
/*---------------------------------------------------------------------------*/
374
CddSumPtr CddProcessSeqAln(SeqAlignPtr* salpInOut, Int4* n_Pdb, BiostrucAlignSeqPtr pbsaSeq,
378
CddSumPtr pcdsThis, pcds = NULL;
382
PDBSeqIdPtr pdb_seq_id;
383
SeqAlignPtr salpCopy, salpTail, salpHead;
384
SeqAnnotPtr annot, thisAnnot;
385
SeqEntryPtr sep, sepNew;
386
SeqIdPtr sip, sip_master = NULL, sipXtra;
388
Int4 iPcount, uid, uidmaster = 0;
392
salpHead = *salpInOut;
393
salpCopy = NULL; salpTail = NULL;
397
ddp = (DenseDiagPtr) salpHead->segs;
400
uid = ID1FindSeqId(sip);
402
pcdsThis = CddSumNew();
404
pcdsThis->iCddIdx = iCddSize;
406
if (sip->choice == 15) {
407
pcdsThis->bIsPdb = TRUE;
408
pdb_seq_id = (PDBSeqIdPtr) CddGetPdbSeqId(sip);
409
pcdsThis->cChainId[0] = pdb_seq_id->chain;
410
pcdsThis->cChainId[1] = '\0';
411
pcdsThis->cPdbId[0] = pdb_seq_id->mol[0];
412
pcdsThis->cPdbId[1] = pdb_seq_id->mol[1];
413
pcdsThis->cPdbId[2] = pdb_seq_id->mol[2];
414
pcdsThis->cPdbId[3] = pdb_seq_id->mol[3];
415
pcdsThis->cPdbId[4] = '\0';
416
pcdsThis->iMMDBId = ConvertMMDBUID(pcdsThis->cPdbId);
417
} else pcdsThis->bIsPdb = FALSE;
418
if (!uidmaster && iPcount == 1) {
419
if (!sip_master) sip_master = sip;
421
pcdsThis->bIsMaster = TRUE;
422
if (pcdsThis->bIsPdb) nPdb++;
424
if (UniqueUid(uid,pcdsThis->bIsPdb,pcds)) {
425
sep = (SeqEntryPtr) ID1SeqEntryGet(uid,retcode);
426
if (sep == NULL) CddSevError("Unable to get MasterSeqEntry from Entrez");
427
bspNew = (BioseqPtr) CddExtractBioseq(sep,sip);
428
if (bTrimBioseq) CddShrinkBioseq(bspNew);
429
/*---------------------------------------------------------------------------*/
430
/* Add Additional SequenceIds in case this is a 3D structure */
431
/*---------------------------------------------------------------------------*/
432
if (pcdsThis->bIsPdb) {
433
sipXtra = ValNodeNew(NULL);
434
sipXtra->data.intvalue = pcdsThis->uid;
435
sipXtra->choice = SEQID_GI;
436
bspNew->id->next = sipXtra;
437
sipXtra = ValNodeNew(NULL);
438
sipXtra->choice = SEQID_GENERAL;
439
dbtp = (DbtagPtr) DbtagNew();
440
dbtp->db = (CharPtr) StringSave("mmdb");
442
oidp->id = pcdsThis->iMMDBId;
444
sipXtra->data.ptrvalue = dbtp;
445
annot = SeqAnnotNew();
446
annot->type = (Uint1) 4;
447
annot->data = sipXtra;
448
if (!bspNew->annot) {
449
bspNew->annot = annot;
451
thisAnnot = bspNew->annot;
452
while (thisAnnot->next) thisAnnot = thisAnnot->next;
453
thisAnnot->next = annot;
456
sepNew = SeqEntryNew();
457
sepNew->data.ptrvalue = bspNew;
459
ValNodeLink(&(pbsaSeq->sequences), sepNew);
462
CddSumLink(&(pcds),pcdsThis);
463
} else if (uid == uidmaster && iPcount == 1) {
466
CddSumFree(pcdsThis);
468
sep = (SeqEntryPtr) ID1SeqEntryGet(uid,retcode);
470
printf("Unable to get SeqEntry %d from ID1, skipping ..\n",uid);
472
if (pcdsThis->bIsPdb) nPdb++;
478
salpTail->next = salpHead;
479
salpTail = salpTail->next;
481
if (UniqueUid(uid,pcdsThis->bIsPdb,pcds)) {
482
bspNew = (BioseqPtr) CddExtractBioseq(sep,sip);
483
if (bTrimBioseq) CddShrinkBioseq(bspNew);
484
/*---------------------------------------------------------------------------*/
485
/* Add Additional SequenceIds in case this is a 3D structure */
486
/*---------------------------------------------------------------------------*/
487
if (pcdsThis->bIsPdb) {
488
sipXtra = ValNodeNew(NULL);
489
sipXtra->data.intvalue = pcdsThis->uid;
490
sipXtra->choice = SEQID_GI;
491
bspNew->id->next = sipXtra;
492
sipXtra = ValNodeNew(NULL);
493
sipXtra->choice = SEQID_GENERAL;
494
dbtp = (DbtagPtr) DbtagNew();
495
dbtp->db = (CharPtr) StringSave("mmdb");
497
oidp->id = pcdsThis->iMMDBId;
499
sipXtra->data.ptrvalue = dbtp;
500
annot = SeqAnnotNew();
501
annot->type = (Uint1) 4;
502
annot->data = sipXtra;
503
if (!bspNew->annot) {
504
bspNew->annot = annot;
506
thisAnnot = bspNew->annot;
507
while (thisAnnot->next) thisAnnot = thisAnnot->next;
508
thisAnnot->next = annot;
510
/* bspNew->id->next->next = sipXtra; */
512
/* bspNew = BioseqLockById(sip); */
513
sepNew = SeqEntryNew();
514
sepNew->data.ptrvalue = bspNew;
516
ValNodeLink(&(pbsaSeq->sequences), sepNew);
521
CddSumLink(&(pcds),pcdsThis);
525
printf("Warning: %s - could not find uid #%d[%d] in ID1, removed from Cdd\n",
526
cCDDid,iCddSize,sip->data.intvalue);
531
salpHead = salpHead->next;
533
salpTail->next = NULL;
535
*salpInOut = salpCopy;
539
/*---------------------------------------------------------------------------*/
540
/*---------------------------------------------------------------------------*/
541
/* move alignments to PDB-derived sequences up in a list */
542
/*---------------------------------------------------------------------------*/
543
/*---------------------------------------------------------------------------*/
544
static SeqAlignPtr CddAlignSort(SeqAlignPtr salp, CddSumPtr pcds)
550
SeqAlignPtr salpThis;
551
SeqAlignPtr salpStruct = NULL;
552
SeqAlignPtr salpStructHead = NULL;
553
SeqAlignPtr salpSeq = NULL;
554
SeqAlignPtr salpSeqHead = NULL;
559
ddp = (DenseDiagPtr) salpThis->segs;
566
if (pcdsThis->bIsPdb && CddSameSip(pcdsThis->sip,sip)) {
569
pcdsThis = pcdsThis->next;
571
if (bCont && sip->choice == 15) {
573
salpStruct = salpThis; salpStructHead = salpThis;
575
salpStruct->next = salpThis;
576
salpStruct = salpStruct->next;
580
salpSeq = salpThis; salpSeqHead = salpThis;
582
salpSeq->next = salpThis;
583
salpSeq = salpSeq->next;
586
salpThis = salpThis->next;
589
if (!salpStructHead) return(salpSeqHead);
590
salpStruct->next = salpSeqHead;
591
salpSeq->next = NULL;
593
salpStruct->next = NULL;
595
return(salpStructHead);
598
/*---------------------------------------------------------------------------*/
599
/*---------------------------------------------------------------------------*/
600
/* move all pdb-derived sequences up in the list */
601
/*---------------------------------------------------------------------------*/
602
/*---------------------------------------------------------------------------*/
603
static CddSumPtr CddSumSort(CddSumPtr pcds)
606
CddSumPtr pcdsStruct = NULL;
607
CddSumPtr pcdsStructHead = NULL;
608
CddSumPtr pcdsSeq = NULL;
609
CddSumPtr pcdsSeqHead = NULL;
613
if (pcdsThis->bIsPdb) {
615
pcdsStruct = pcdsThis; pcdsStructHead = pcdsThis;
617
pcdsStruct->next = pcdsThis;
618
pcdsStruct = pcdsStruct->next;
622
pcdsSeq = pcdsThis; pcdsSeqHead = pcdsThis;
624
pcdsSeq->next = pcdsThis;
625
pcdsSeq = pcdsSeq->next;
628
pcdsThis = pcdsThis->next;
631
pcdsStruct->next = pcdsSeqHead;
632
pcdsSeq->next = NULL;
634
pcdsStruct->next = NULL;
636
return(pcdsStructHead);
639
/*---------------------------------------------------------------------------*/
640
/*---------------------------------------------------------------------------*/
641
/* Read information about which VAST data have to be retrieved */
642
/*---------------------------------------------------------------------------*/
643
/*---------------------------------------------------------------------------*/
644
static void CddReadVASTInfo(CddSumPtr pcds)
656
strcpy(path,CDDvpath);
658
strcat(path,".VASTinfo");
659
if (!(fp=FileOpen(path, "r"))) return;
662
pcTest = fgets(pcBuf, (size_t)100,fp);
664
strcpy(cMaster,strtok(pcTest,"\t")); cMaster[6] = '\0';
665
strcpy(cSlave,strtok(NULL,"\t")); cSlave[6] = '\0';
666
strcpy(cPKBMDom,strtok(NULL,"\t")); cPKBMDom[6] = '\0';
667
strncpy(cPKBDom,strtok(NULL,"\t"),8); cPKBDom[8] = '\0';
670
if (pcdsThis->bIsPdb && !(pcdsThis->bIsMaster)) {
671
if (strncmp(cSlave,pcdsThis->cPdbId,4)==0) {
672
if (pcdsThis->cChainId[0]==cSlave[5]) {
673
strcpy(pcdsThis->cPKBMDom,cPKBMDom);
674
strcpy(pcdsThis->cPKBDom,cPKBDom);
678
pcdsThis = pcdsThis->next;
685
/*---------------------------------------------------------------------------*/
686
/*---------------------------------------------------------------------------*/
687
/* checks for general overlap (i.e. two intervals have at least one position */
689
/*---------------------------------------------------------------------------*/
690
/*---------------------------------------------------------------------------*/
691
Boolean OverlapInterval(Int4 from1, Int4 to1, Int4 from2, Int4 to2)
693
if (from1 <= to2 && to1 >= from2) return (TRUE);
697
/*----------------------------------------------------------------------------*/
698
/* The callback routine - for sorting aligned blocks from the real.ind file */
699
/*----------------------------------------------------------------------------*/
700
static int LIBCALLBACK CompareBlocks(VoidPtr vp1, VoidPtr vp2)
702
RealIndPtr pri1 = NULL;
703
RealIndPtr pri2 = NULL;
704
ValNodePtr vnp1, vnp2;
705
ValNodePtr PNTR vnpp1;
706
ValNodePtr PNTR vnpp2;
710
vnpp1 = (ValNodePtr PNTR) vp1;
711
vnpp2 = (ValNodePtr PNTR) vp2;
714
pri1 = (RealIndPtr) vnp1->data.ptrvalue;
715
pri2 = (RealIndPtr) vnp2->data.ptrvalue;
717
if (pri1 == NULL && pri2 != NULL) return bigr;
718
if (pri1 != NULL && pri2 == NULL) return smlr;
719
if (pri1 == NULL && pri2 == NULL) return 0;
721
if (pri1->aid > pri2->aid) return bigr;
722
else if (pri1->aid < pri2->aid) return smlr;
724
if (pri1->mstart > pri2->mstart) return bigr;
725
else if (pri1->mstart < pri2->mstart) return smlr;
727
if (pri1->mstop > pri2->mstop) return bigr;
728
else if (pri1->mstop < pri2->mstop) return smlr;
730
if (pri1->sstart > pri2->sstart) return bigr;
731
else if (pri1->sstart < pri2->sstart) return smlr;
733
if (pri1->sstop > pri2->sstop) return bigr;
734
else if (pri1->sstop < pri2->sstop) return smlr;
739
/*---------------------------------------------------------------------------*/
740
/*---------------------------------------------------------------------------*/
741
/* read in an alignment formatted in the "real.ind" style, i.e. in a white- */
742
/* space delimited 6 column format: ID of master, ID of slave, start/stop */
743
/* pairs in master and slave. IDs are gi's, and contain PDB-Ids if structure */
744
/* derived which are separated from the gi's with a vertical bar */
745
/*---------------------------------------------------------------------------*/
746
/*---------------------------------------------------------------------------*/
747
SeqAlignPtr CddReadRealInd(CharPtr name)
750
SeqAlignPtr salp, salpHead = NULL, salpTail = NULL;
751
DenseDiagPtr ddp, ddpTail;
752
RealIndPtr priThat, priThis, priHead = NULL, priTail = NULL, priTmp;
753
PDBSeqIdPtr pdb_seq_id;
755
Char mid[15], sid[15];
758
Int4 mstart, mstop, sstart, sstop;
761
ValNodePtr vnp = NULL;
763
if ((infile = FileOpen(name,"r"))==NULL)
764
CddSevError("Could not open real.ind-file!");
765
/*---------------------------------------------------------------------------*/
766
/* scan in file and fill the RealInd data structures */
767
/*---------------------------------------------------------------------------*/
768
while (fscanf(infile,"%s %s %d %d %d %d\n",mid,sid,&mstart,&mstop,&sstart,&sstop)!=EOF) {
769
priThis = (RealIndPtr) MemNew(sizeof(RealInd));
770
priThis->mstart = mstart;
771
priThis->mstop = mstop;
772
priThis->sstart = sstart;
773
priThis->sstop = sstop;
775
nxtid = strstr(mid,"|");
778
pdb_seq_id = (PDBSeqIdPtr) PDBSeqIdNew();
779
if (strlen(pdbid) > 4) pdb_seq_id->chain = (Uint1) *(pdbid+4);
781
pdb_seq_id->mol = strdup(pdbid);
783
priThis->mgi = atoi(mid);
784
sip = ValNodeNew(NULL);
785
sip->choice = SEQID_PDB;
786
sip->data.ptrvalue = pdb_seq_id;
788
priThis->mIsPdb = TRUE;
790
priThis->mgi = atoi(mid);
791
sip = ValNodeNew(NULL);
792
sip->choice = SEQID_GI;
793
sip->data.intvalue = priThis->mgi;
795
priThis->mIsPdb = FALSE;
797
nxtid = strstr(sid,"|");
800
pdb_seq_id = (PDBSeqIdPtr) PDBSeqIdNew();
801
if (strlen(pdbid) > 4) pdb_seq_id->chain = (Uint1) *(pdbid+4);
803
pdb_seq_id->mol = strdup(pdbid);
805
priThis->sgi = atoi(sid);
806
sip = ValNodeNew(NULL);
807
sip->choice = SEQID_PDB;
808
sip->data.ptrvalue = pdb_seq_id;
810
priThis->sIsPdb = TRUE;
812
priThis->sgi = atoi(sid);
813
sip = ValNodeNew(NULL);
814
sip->choice = SEQID_GI;
815
sip->data.intvalue = priThis->sgi;
817
priThis->sIsPdb = FALSE;
820
priTail->next = priThis;
822
priThis->next = NULL;
826
priThis->next = NULL;
835
conflict = MemNew((lastid+1)*sizeof(Int4));
836
for (i=0;i<=lastid;i++) conflict[i] = 0;
838
while (priThat != priThis) {
839
if (priThat->mgi != priThis->mgi)
840
CddSevError("Inconsistent master-id's in real.ind file!");
841
if (priThat->sgi != priThis->sgi) {
842
conflict[priThat->aid] = 1;
844
if (OverlapInterval(priThis->mstart,priThis->mstop,priThat->mstart,priThat->mstop))
845
conflict[priThat->aid] = 1;
846
if (OverlapInterval(priThis->sstart,priThis->sstop,priThat->sstart,priThat->sstop))
847
conflict[priThat->aid] = 1;
848
if (priThis->mstart < priThat->mstart &&
849
priThis->sstart > priThat->sstart)
850
conflict[priThat->aid] = 1;
851
if (priThis->mstart > priThat->mstart &&
852
priThis->sstart < priThat->sstart)
853
conflict[priThat->aid] = 1;
855
priThat = priThat->next;
858
for (i=0;i<=lastid;i++) {
859
if (conflict[i] == 0) {
864
if (priThis->aid == -1) priThis->aid = ++lastid;
867
priThis = priThis->next;
871
ValNodeAddPointer(&vnp,0,priThis);
872
priThis = priThis->next;
874
VnpHeapSort(&vnp,CompareBlocks);
878
priThis = vnp->data.ptrvalue;
880
priTail->next = priThis;
882
priThis->next = NULL;
886
priThis->next = NULL;
890
priThis = priHead; priThat = NULL;
892
if (priThis == priHead || (priThat && priThis->aid != priThat->aid)) {
893
salp = (SeqAlignPtr) SeqAlignNew();
894
salp->type = (Uint1) SAT_PARTIAL;
895
salp->segtype = (Uint1) SAS_DENDIAG;
901
salpTail->next = NULL;
903
salpTail->next = salp;
905
salpTail->next = NULL;
908
ddp = DenseDiagNew();
911
ddp->id = priThis->msip;
912
ddp->id->next = priThis->ssip;
913
ddp->len = priThis->mstop - priThis->mstart + 1;
914
ddp->starts = MemNew(2*sizeof(Int4));
915
ddp->starts[0] = priThis->mstart-1;
916
ddp->starts[1] = priThis->sstart-1;
925
priThis = priThis->next;
932
/*---------------------------------------------------------------------------*/
933
/*---------------------------------------------------------------------------*/
934
/* From the VAST info get the feature set id's required for slaves */
935
/*---------------------------------------------------------------------------*/
936
/*---------------------------------------------------------------------------*/
937
static void CddDetermineFsids(CddSumPtr pcds, PDNMS pModelStruc)
946
Int4 ichn, idom, domcnt;
947
Int4 chndom0, chndom;
951
pmsd = (PMSD) pModelStruc->data.ptrvalue;
952
uid = (Int4) pmsd->iMMDBid;
954
for (pdnmm=pmsd->pdnmmHead,cnt=ichn=0;pdnmm!=NULL;pdnmm=pdnmm->next) {
955
pmmd = (PMMD) pdnmm->data.ptrvalue;
956
/*---------------------------------------------------------------------------*/
957
/* getting the chain id from pmmd seems to be a safer way than to increment */
958
/*---------------------------------------------------------------------------*/
959
ichn = pmmd->iChainId;
960
if ((pmmd->bWhat) & AM_PROT) {
961
if ((pmmd->iResCount <= 1) || (pmmd->iGi <= 0)) continue;
962
chndom0 = 10000 * uid + 100 * (Int4) ichn;
965
if (pcdsThis->bIsPdb && !pcdsThis->bIsMaster) {
966
if (pcdsThis->cPKBMDom[4]==pmmd->pcMolName[0]) {
967
if (pcdsThis->cPKBMDom[5]=='0') pcdsThis->iFsid = chndom0;
970
pcdsThis = pcdsThis->next;
972
idom = 0; domcnt = 0;
973
for (pdnmg=pmmd->pdnmgHead; pdnmg != NULL; pdnmg=pdnmg->next) {
974
pmgd = pdnmg->data.ptrvalue;
975
if (pmgd->iDomain > idom) {
976
idom = (Int4) pmgd->iDomain;
977
chndom = chndom0+idom;
981
if (pcdsThis->bIsPdb && !pcdsThis->bIsMaster) {
982
if (pcdsThis->cPKBMDom[4]==pmmd->pcMolName[0]) {
983
strcpy(cDid,&(pcdsThis->cPKBMDom[5]));
984
if (atoi(cDid)==domcnt) pcdsThis->iFsid = chndom;
987
pcdsThis = pcdsThis->next;
995
/*---------------------------------------------------------------------------*/
996
/*---------------------------------------------------------------------------*/
997
/* get the Feature id's identifying the correct slaves in the VAST data */
998
/*---------------------------------------------------------------------------*/
999
/*---------------------------------------------------------------------------*/
1000
static void CddDetermineFids(CddSumPtr pcds, PDNMS pModelStruc, CharPtr szName)
1009
Int4 ichn, idom, domcnt;
1010
Int4 chndom0, chndom;
1014
pmsd = (PMSD) pModelStruc->data.ptrvalue;
1015
uid = (Int4) pmsd->iMMDBid;
1017
for (pdnmm=pmsd->pdnmmHead,cnt=ichn=0;pdnmm!=NULL;pdnmm=pdnmm->next) {
1018
pmmd = (PMMD) pdnmm->data.ptrvalue;
1019
ichn = pmmd->iChainId;
1020
if ((pmmd->bWhat) & AM_PROT) {
1021
if ((pmmd->iResCount <= 1) || (pmmd->iGi <= 0)) continue;
1022
chndom0 = 100000 * uid + 1000 * (Int4) ichn;
1025
if (pcdsThis->bIsPdb && !pcdsThis->bIsMaster) {
1026
if (strncmp(pcdsThis->cPdbId,szName,4)==0) {
1027
if (pcdsThis->cPKBDom[5]==pmmd->pcMolName[0]) {
1028
if (pcdsThis->cPKBDom[7]==' ') {
1029
chndom = chndom0 + 1;
1030
if (pcdsThis->iFid == -1) pcdsThis->iFid = chndom;
1035
pcdsThis = pcdsThis->next;
1037
idom = 0; domcnt = 0;
1038
for (pdnmg=pmmd->pdnmgHead; pdnmg != NULL; pdnmg=pdnmg->next) {
1039
pmgd = pdnmg->data.ptrvalue;
1040
if (pmgd->iDomain > idom) {
1041
idom = (Int4) pmgd->iDomain;
1042
chndom = chndom0 + idom * 10;
1046
if (pcdsThis->bIsPdb && !pcdsThis->bIsMaster) {
1047
if (strncmp(pcdsThis->cPdbId,szName,4)==0) {
1048
if (pcdsThis->cPKBDom[5]==pmmd->pcMolName[0]) {
1049
strcpy(cDid,&(pcdsThis->cPKBDom[7]));
1050
chndom = chndom + 1;
1051
if (atoi(cDid)==domcnt) {
1052
if (pcdsThis->iFid == -1) pcdsThis->iFid = chndom;
1057
pcdsThis = pcdsThis->next;
1065
/*---------------------------------------------------------------------------*/
1066
/*---------------------------------------------------------------------------*/
1067
/* Identify Master Feature Set Id's that go with each aligned slave. */
1068
/*---------------------------------------------------------------------------*/
1069
/*---------------------------------------------------------------------------*/
1070
static void CddIdentifyFsids(CddSumPtr pcds)
1073
Char chain[2], szName[5];
1074
BiostrucPtr pbsXtra, pbsTemp;
1075
Int4 iModelComplexity = ONECOORDRES;
1079
iMMDBid = pcds->iMMDBId;
1080
strcpy(szName,pcds->cPdbId);
1081
pbsXtra = (BiostrucPtr) MMDBBiostrucGet(ConvertMMDBUID(szName),iModelComplexity,1);
1082
strcpy(chain,pcds->cChainId);
1083
if (chain[0] != ' ') {
1084
pbsTemp = (BiostrucPtr)PruneBiostruc(pbsXtra,chain);
1088
pModelStruc = MakeAModelstruc(pbsXtra);
1089
CddDetermineFsids(pcds, pModelStruc);
1091
pcdsThis = pcds->next;
1093
if (pcdsThis->bIsPdb) {
1094
strcpy(szName,pcdsThis->cPdbId);
1095
pbsXtra = (BiostrucPtr) MMDBBiostrucGet(ConvertMMDBUID(szName),iModelComplexity,1);
1096
strcpy(chain,pcdsThis->cChainId);
1097
if (chain[0] != ' ') {
1098
pbsTemp = (BiostrucPtr)PruneBiostruc(pbsXtra,chain);
1102
iMMDBid = pcdsThis->iMMDBId;
1103
pModelStruc = MakeAModelstruc(pbsXtra);
1104
if (pModelStruc) CddDetermineFids(pcds,pModelStruc,szName);
1107
pcdsThis = pcdsThis->next;
1111
/*---------------------------------------------------------------------------*/
1112
/*---------------------------------------------------------------------------*/
1113
/* stolen from vastlocl.c */
1114
/*---------------------------------------------------------------------------*/
1115
/*---------------------------------------------------------------------------*/
1116
static BiostrucAnnotSetPtr CddVASTBsAnnotSetGet (Int4 uid)
1118
AsnIoPtr aip = NULL;
1119
AsnTypePtr atp = NULL;
1120
Char path[PATH_MAX];
1121
Char compath[PATH_MAX];
1122
Char tempfile[PATH_MAX];
1124
Int2 iFileExists = 0;
1125
BiostrucAnnotSetPtr pbsa = NULL;
1129
sprintf(pcId, "%ld", (long) uid);
1131
StringCpy(path, database);
1132
StringCat(path, pcId);
1133
StringCat(path, ".bas");
1135
#ifdef MMDB_UNIXCOMPRESSED
1137
sprintf(compath, "%s -c %s.gz ", gunzip, path);
1138
pipe = popen(compath, "rb");
1140
ErrPostEx(SEV_FATAL,0,0, "VASTBsAnnotSetGet failed: Can't find gunzip in path.\n");
1143
aip = AsnIoNew(ASNIO_BIN_IN, pipe, NULL, NULL, NULL);
1145
iFileExists = FileLength(path);
1146
if (iFileExists == 0) {
1149
aip = AsnIoOpen(path, "rb");
1152
pbsa = BiostrucAnnotSetAsnRead(aip, NULL);
1155
#ifdef MMDB_UNIXCOMPRESSED
1158
if (!pbsa) return NULL;
1162
/*---------------------------------------------------------------------------*/
1163
/*---------------------------------------------------------------------------*/
1164
/* stolen from vastlocl.c */
1165
/*---------------------------------------------------------------------------*/
1166
/*---------------------------------------------------------------------------*/
1167
static Boolean CddIsVASTData(Int4 uid)
1169
AsnIoPtr aip = NULL;
1170
AsnTypePtr atp = NULL;
1171
Char path[PATH_MAX];
1174
sprintf(pcId, "%ld", (long) uid);
1176
StringCpy(path, database);
1177
StringCat(path, pcId);
1178
StringCat(path, ".bas");
1180
#ifdef MMDB_UNIXCOMPRESSED
1181
StringCat(path, ".gz");
1182
if (FileLength(path) != 0) return TRUE;
1184
if (FileLength(path) != 0) return TRUE;
1189
/*---------------------------------------------------------------------------*/
1190
/*---------------------------------------------------------------------------*/
1191
/* stolen from vastsrv.c */
1192
/*---------------------------------------------------------------------------*/
1193
/*---------------------------------------------------------------------------*/
1194
static BiostrucAnnotSetPtr CddLocalGetFeatureSet(Int4 mmdbid, Int4 feature_set_id)
1196
BiostrucAnnotSetPtr basp = NULL;
1197
BiostrucAnnotSetPtr basp2 = NULL;
1198
BiostrucFeatureSetPtr pbsfs = NULL;
1199
BiostrucFeatureSetPtr pbsfsLast = NULL;
1201
if (CddIsVASTData(mmdbid))
1202
basp = (BiostrucAnnotSetPtr) CddVASTBsAnnotSetGet(mmdbid);
1203
else if (CddIsVASTData(feature_set_id)) {
1204
basp = (BiostrucAnnotSetPtr) CddVASTBsAnnotSetGet(feature_set_id);
1205
if (basp != NULL) return basp;
1208
if (basp == NULL) return NULL;
1210
pbsfs = basp->features;
1214
if (pbsfs->id == feature_set_id) {
1215
basp2 = BiostrucAnnotSetNew();
1216
basp2->id = basp->id;
1217
basp->id = NULL; /* unlink the id valnode from basp object */
1218
basp2->descr = basp->descr;
1219
basp->descr = NULL; /* unlink the descr from basp object */
1220
basp2->features = pbsfs;
1221
if (pbsfsLast) /* relink next to prev */
1222
pbsfsLast->next = pbsfs->next;
1223
else basp->features = pbsfs->next;
1224
basp2->features->next = NULL;
1225
BiostrucAnnotSetFree(basp);
1229
pbsfs = pbsfs->next;
1231
BiostrucAnnotSetFree(basp);
1235
/*---------------------------------------------------------------------------*/
1236
/*---------------------------------------------------------------------------*/
1237
/* stolen from vastsrv.c */
1238
/*---------------------------------------------------------------------------*/
1239
/*---------------------------------------------------------------------------*/
1240
static BiostrucAnnotSetPtr CddBiostrucAnnotSetGetByFid (BiostrucAnnotSetPtr basp, Int4 feature_id, Int4 feature_set_id)
1242
BiostrucAnnotSetPtr basp2 = NULL;
1243
BiostrucFeatureSetPtr pbsfs = NULL;
1244
BiostrucFeaturePtr pbsf = NULL;
1246
if (basp == NULL) return NULL;
1248
pbsfs = basp->features;
1250
if (pbsfs->id == feature_set_id) {
1251
pbsf = pbsfs->features;
1253
if (pbsf->id == feature_id) { /* found it */
1254
basp2 = BiostrucAnnotSetNew();
1255
basp2->id = basp->id;
1256
basp->id = NULL; /* unlink the id valnode from basp object */
1257
basp2->descr = basp->descr;
1258
basp->descr = NULL; /* unlink the descr from basp object */
1259
basp2->features = BiostrucFeatureSetNew();
1260
basp2->features->id = pbsfs->id;
1261
basp2->features->descr = pbsfs->descr;
1262
pbsfs->descr = NULL; /* unlink the feature-set descr from basp object */
1263
basp2->features->features = BiostrucFeatureNew();
1264
basp2->features->features->id = pbsf->id;
1265
basp2->features->features->name = StringSave(pbsf->name);
1266
basp2->features->features->type = pbsf->type;
1267
basp2->features->features->Property_property = pbsf->Property_property;
1268
pbsf->Property_property = NULL; /* unlink the property from basp object */
1269
basp2->features->features->Location_location = pbsf->Location_location;
1270
pbsf->Location_location = NULL; /* unlink the location from basp object */
1271
BiostrucAnnotSetFree(basp);
1277
pbsfs = pbsfs->next;
1279
BiostrucAnnotSetFree(basp);
1283
/*---------------------------------------------------------------------------*/
1284
/*---------------------------------------------------------------------------*/
1285
/* Get the BiostrucAnnotSets from the VAST neighbor data */
1286
/*---------------------------------------------------------------------------*/
1287
/*---------------------------------------------------------------------------*/
1288
static void CddLoadBSAnnotSets(CddSumPtr pcds, Int4 *nPdb, Int2 *iSeqStrMode,
1289
SeqAlignPtr salp, BiostrucAlignPtr pbsaStruct)
1292
BiostrucAnnotSetPtr pbsa = NULL;
1293
Int4 iMMDBid, iDomain, iMmid, iSmid;
1294
Boolean bShrunk = FALSE;
1295
Boolean bRemove = FALSE;
1296
BiostrucAnnotSetPtr pbsaShort = NULL;
1297
BiostrucFeaturePtr pbsf;
1298
BiostrucPtr pbsSlaveHead = NULL;
1299
BiostrucPtr pbsSlaveTail, pbsSlave;
1301
SeqAlignPtr salpCopy, salpHead;
1302
SeqIdPtr mastersip, pdbsip, slavesip, sip;
1305
ValNodePtr pvnAlignment;
1306
ChemGraphAlignmentPtr cgap;
1307
ChemGraphPntrsPtr cgpp;
1308
ResidueIntervalPntrPtr mrip, srip, masterrip, mtailrip, stailrip, slaverip;
1314
if (pcdsThis->bIsPdb && !pcdsThis->bIsMaster) {
1315
pbsa = (BiostrucAnnotSetPtr) CddLocalGetFeatureSet(pcds->iMMDBId,pcdsThis->iFsid);
1316
pcdsThis->pbsaShort = CddBiostrucAnnotSetGetByFid(pbsa, pcdsThis->iFid, pcdsThis->iFsid);
1317
if (pcdsThis->pbsaShort) {
1318
if (pbsaShort == NULL) {
1319
pbsaShort = pcdsThis->pbsaShort;
1320
pbsf = pbsaShort->features->features;
1323
pbsf->next = pcdsThis->pbsaShort->features->features;
1329
pcdsThis->bIsPdb = FALSE;
1331
/*---------------------------------------------------------------------------*/
1332
/* if a particular slave is not a VAST neighbor, the slave structure must be */
1333
/* removed - otherwise Cn3D will barf */
1334
/*---------------------------------------------------------------------------*/
1335
iMMDBid = pcdsThis->iMMDBId;
1336
pbsSlaveHead = pbsaStruct->slaves;
1337
pbsSlave = pbsSlaveHead;
1338
pbsSlaveTail = NULL;
1339
while (pbsSlaveHead) {
1341
pbsi=pbsSlaveHead->id;
1343
if (pbsi->choice == BiostrucId_mmdb_id) {
1344
if (pbsi->data.intvalue == iMMDBid) {
1346
if (!pbsSlaveTail) {
1347
pbsSlave = pbsSlaveHead->next;
1348
pbsSlaveTail = pbsSlave;
1350
pbsSlaveTail->next = pbsSlaveHead->next;
1351
pbsSlaveTail = pbsSlaveTail->next;
1358
if (!bRemove) pbsSlaveTail = pbsSlaveHead;
1359
pbsSlaveHead = pbsSlaveHead->next;
1361
pbsaStruct->slaves = pbsSlave;
1364
pcdsThis = pcdsThis->next;
1366
/*---------------------------------------------------------------------------*/
1367
/* need to reorder the SeqAligns if some PDB-derived seqs. are not VAST ngb. */
1368
/*---------------------------------------------------------------------------*/
1371
salpCopy = CddAlignSort(salp,pcds);
1375
if (pbsaShort) pbsaStruct->alignments = pbsaShort;
1377
if (*iSeqStrMode == SEVSTRUC) *iSeqStrMode = ONESTRUC;
1378
if (*iSeqStrMode == CDDSEVSTRUC) *iSeqStrMode = CDDONESTRUC;
1380
/*---------------------------------------------------------------------------*/
1381
/* if required, change structure alignment to reflect SMART sequence align- */
1382
/* ment, i.e. "fix coloring" in the Cn3D display */
1383
/*---------------------------------------------------------------------------*/
1384
if (pbsaStruct->alignments) {
1385
pbsf = pbsaStruct->alignments->features->features;
1387
pcPDB=StringSave(PDBNAME_DEFAULT);
1388
iDomain = 0; cChain = '-';
1389
pcPDB[0] = pbsf->name[0]; pcPDB[1] = pbsf->name[1];
1390
pcPDB[2] = pbsf->name[2]; pcPDB[3] = pbsf->name[3];
1391
cChain = pbsf->name[4];
1392
iDomain = atoi ((char *) &pbsf->name[5]);
1393
mastersip = MakePDBSeqId2(pcPDB,cChain,iDomain,FALSE);
1394
pdbsip = MakePDBSeqId2(pcPDB,cChain,iDomain,FALSE);
1395
pcPDB[0] = pbsf->name[7]; pcPDB[1] = pbsf->name[8];
1396
pcPDB[2] = pbsf->name[9]; pcPDB[3] = pbsf->name[10];
1397
cChain = pbsf->name[11];
1398
iDomain = atoi ((char *) &pbsf->name[12]);
1399
slavesip = MakePDBSeqId2(pcPDB,cChain,iDomain,FALSE);
1401
pvnAlignment = ValNodeFindNext(pbsf->Location_location,NULL,Location_location_alignment);
1402
cgap = (ChemGraphAlignmentPtr) pvnAlignment->data.ptrvalue;
1403
cgpp = (ChemGraphPntrsPtr) cgap->alignment->data.ptrvalue;
1404
masterrip = (ResidueIntervalPntrPtr) cgpp->data.ptrvalue;
1405
iMmid = masterrip->molecule_id;
1406
cgpp = (ChemGraphPntrsPtr) cgap->alignment->next->data.ptrvalue;
1407
slaverip = (ResidueIntervalPntrPtr) cgpp->data.ptrvalue;
1408
iSmid = slaverip->molecule_id;
1409
/*---------------------------------------------------------------------------*/
1410
/* now identify the CDD alignment corresponding to the current structure ali.*/
1411
/*---------------------------------------------------------------------------*/
1412
salpHead = (SeqAlignPtr) salp;
1414
ddp = (DenseDiagPtr) salpHead->segs;
1416
if (CddSameSip(sip,mastersip)) {
1417
sip = sip->next; if (CddSameSip(sip,slavesip)) {
1418
/*---------------------------------------------------------------------------*/
1419
/* if the alignment is found, start building up a new set of residue interv. */
1420
/*---------------------------------------------------------------------------*/
1421
masterrip = NULL; slaverip = NULL;
1423
mrip = ResidueIntervalPntrNew();
1424
srip = ResidueIntervalPntrNew();
1425
mrip->from = ddp->starts[0]+1;
1426
srip->from = ddp->starts[1]+1;
1427
mrip->to = ddp->starts[0]+ddp->len;
1428
srip->to = ddp->starts[1]+ddp->len;
1429
mrip->molecule_id = iMmid;
1430
srip->molecule_id = iSmid;
1434
mtailrip = masterrip;
1435
stailrip = slaverip;
1437
mtailrip->next = mrip;
1438
stailrip->next = srip;
1439
mtailrip = mtailrip->next;
1440
stailrip = stailrip->next;
1446
salpHead = salpHead->next;
1448
cgpp = (ChemGraphPntrsPtr) cgap->alignment->data.ptrvalue;
1449
cgpp->data.ptrvalue = masterrip;
1450
cgpp = (ChemGraphPntrsPtr) cgap->alignment->next->data.ptrvalue;
1451
cgpp->data.ptrvalue = slaverip;
1458
/*---------------------------------------------------------------------------*/
1459
/*---------------------------------------------------------------------------*/
1460
/* allocate a new CddDesc linked list entry */
1461
/*---------------------------------------------------------------------------*/
1462
/*---------------------------------------------------------------------------*/
1463
static CddDescPtr CddDescNew()
1467
pcdd = (CddDescPtr)MemNew(sizeof(CddDesc));
1468
if (pcdd == NULL) return pcdd;
1469
pcdd->cCddId[0] = '\0';
1470
pcdd->cDescr[0] = '\0';
1471
pcdd->cSourc[0] = '\0';
1476
/*---------------------------------------------------------------------------*/
1477
/*---------------------------------------------------------------------------*/
1478
/* Free a CddDesc linked list */
1479
/*---------------------------------------------------------------------------*/
1480
/*---------------------------------------------------------------------------*/
1481
static CddDescPtr CddDescFree(CddDescPtr pcdd)
1493
/*---------------------------------------------------------------------------*/
1494
/*---------------------------------------------------------------------------*/
1495
/* adds a to a linked list of CddDescPtr, always returns the beginning of the*/
1496
/* list and always adds to the end of the list!! */
1497
/*---------------------------------------------------------------------------*/
1498
/*---------------------------------------------------------------------------*/
1499
static CddDescPtr CddDescLink(CddDescPtr PNTR head, CddDescPtr newnode)
1503
if (head == NULL) return newnode;
1506
while(pcdd->next != NULL) pcdd = pcdd->next;
1507
pcdd->next = newnode;
1508
} else *head = newnode;
1513
/*---------------------------------------------------------------------------*/
1514
/*---------------------------------------------------------------------------*/
1515
/* Read the table of Cdd Names and descriptions */
1516
/*---------------------------------------------------------------------------*/
1517
/*---------------------------------------------------------------------------*/
1518
static CddDescPtr CddReadDescr() {
1520
Char pcBuf[CDD_MAX_DESCR];
1522
CddDescPtr pcdd = NULL;
1523
CddDescPtr pcddThis;
1525
if (!(fp = FileOpen(CDDdescr, "r")))
1526
CddSevError("Can not read description file!");
1529
pcTest = fgets(pcBuf, (size_t)CDD_MAX_DESCR, fp);
1530
if (pcTest) if (pcTest[0] != ' ') {
1531
pcddThis = CddDescNew();
1532
strcpy(pcddThis->cCddId,strtok(pcTest,"\t"));
1533
strcpy(pcddThis->cDescr,strtok(NULL,"\t"));
1534
strcpy(pcddThis->cSourc,strtok(NULL,"\t"));
1535
CddDescLink(&(pcdd),pcddThis);
1542
/*---------------------------------------------------------------------------*/
1543
/*---------------------------------------------------------------------------*/
1544
/* return a pointer to the master sequence id */
1545
/*---------------------------------------------------------------------------*/
1546
/*---------------------------------------------------------------------------*/
1547
static SeqIdPtr CddDetermineMasterSip(CddPtr pcdd)
1554
if (!pcdd) return NULL;
1555
psa = (SeqAnnotPtr) pcdd->seqannot;
1556
if (!psa) return NULL;
1557
salp = (SeqAlignPtr) psa->data;
1558
if (!salp) return NULL;
1560
if (!ddp) return NULL;
1565
/*---------------------------------------------------------------------------*/
1566
/*---------------------------------------------------------------------------*/
1567
/* local version of tax1_join */
1568
/*---------------------------------------------------------------------------*/
1569
/*---------------------------------------------------------------------------*/
1570
static Int4 Cdd_tax1_join(Int4 taxid1, Int4 taxid2)
1572
TXC_TreeNodePtr *txtnpp1;
1573
TXC_TreeNodePtr *txtnpp2;
1574
Int4 inLineage1, inLineage2;
1579
if (taxid1 == taxid2) return (taxid1);
1580
if (taxid1 <= 1 || taxid2 <=1) return(1);
1581
txtnpp1 = (TXC_TreeNodePtr *)txc_getLineage(taxid1,&inLineage1);
1582
txtnpp2 = (TXC_TreeNodePtr *)txc_getLineage(taxid2,&inLineage2);
1583
if (txtnpp1 && txtnpp2 && inLineage1 > 0 && inLineage2 > 0) {
1584
for (i=0;i<inLineage1;i++) {
1585
for (j=0;j<inLineage2;j++) {
1586
if (txtnpp1[i]->tax_id == txtnpp2[j]->tax_id) {
1587
retid = txtnpp1[i]->tax_id;
1591
if (retid != -1) break;
1596
for (i=0;i<inLineage1;i++) MemFree(txtnpp1[i]);
1598
for (i=0;i<inLineage2;i++) MemFree(txtnpp2[i]);
1601
if (retid != -1) return (retid);
1605
/*---------------------------------------------------------------------------*/
1606
/*---------------------------------------------------------------------------*/
1607
/* put taxonomy root node in the CD */
1608
/*---------------------------------------------------------------------------*/
1609
/*---------------------------------------------------------------------------*/
1611
static void CddAddTax(CddPtr pcdd)
1624
ValNodePtr descr, vnpTail = NULL, vnp;
1625
Int4 iTxid1 = -1, iTxid2, gi;
1627
descr = pcdd->description; /* assumes that Tax-node is not 1st description*/
1628
while (descr) { /* and that there's no 2 consecutive Tax-nodes */
1629
if (descr->choice == CddDescr_tax_source) {
1630
printf("Updating existing taxonomy assignments...\n");
1631
if (vnpTail) vnpTail->next = descr->next;
1634
descr = descr->next;
1637
if (!TaxArchInit()) CddSevError("Can't initialize taxonomy services\n");
1638
bssp = (BioseqSetPtr) pcdd->sequences->data.ptrvalue;
1639
sep = bssp->seq_set;
1641
if (sep->choice != 1) {
1642
printf("Warning: Error in Seq-entry!\n");
1644
bsp = sep->data.ptrvalue;
1647
if (descr->choice == Seq_descr_source) {
1648
bsop = descr->data.ptrvalue;
1649
pOrgRef = bsop->org;
1651
dbtp = pOrgRef->db->data.ptrvalue;
1656
gi = GetGIForSeqId (sip);
1657
iTxid2 = tax1_getTaxId4GI(gi);
1660
descr = descr->next;
1666
iTxid1 = Cdd_tax1_join(iTxid2, iTxid1);
1673
t1dp = (Taxon1DataPtr) tax1_getbyid(iTxid1);
1674
pOrgRef = t1dp->org;
1675
if (pOrgRef) CddAssignDescr(pcdd,pOrgRef,CddDescr_tax_source,0);
1678
if (!TaxArchFini()) CddSevError("Can't disconnect from taxonomy services\n");
1681
/*---------------------------------------------------------------------------*/
1682
/*---------------------------------------------------------------------------*/
1683
/*---------------------------------------------------------------------------*/
1684
/*---------------------------------------------------------------------------*/
1685
static ValNodePtr SeqDescCopy(ValNodePtr vnp)
1689
vnpNew = ValNodeNew(NULL);
1690
vnpNew->choice = vnp->choice;
1691
vnpNew->data.ptrvalue = strdup(vnp->data.ptrvalue);
1695
/*---------------------------------------------------------------------------*/
1696
/*---------------------------------------------------------------------------*/
1697
/* Main Function; name of the CD to be dumped is a command-line parameter */
1698
/*---------------------------------------------------------------------------*/
1699
/*---------------------------------------------------------------------------*/
1702
BioseqPtr bspTrunc, bspCons, bspFake;
1705
BiostrucAlignPtr pbsaStruct = NULL;
1706
BiostrucAlignSeqPtr pbsaSeq;
1707
BiostrucSeqsPtr pbsaStrSeq;
1708
CddDescPtr pcddesc = NULL, pcddeschead = NULL;
1709
CddExpAlignPtr pCDea, pCDea1, pCDeaR;
1711
CddSumPtr pcds = NULL;
1713
CharPtr cCategory, pcTest;
1714
CharPtr cDescStr, cDescNew;
1719
SeqAlignPtr salpHead, salpNew, salpCons, salpCopy, salpTail;
1720
SeqAlignPtr salpThis;
1721
SeqAnnotPtr psaCAlignHead;
1723
SeqEntryPtr oldhead, newhead, septemp, oldtail, newtail, remaind;
1724
SeqIdPtr sip_master, sipTrunc;
1726
TrianglePtr pTri = NULL;
1727
ValNodePtr pvnId, pub, vnp, desc = NULL;
1728
Int4 muid, nPdb = 0, i;
1729
Int2 iSeqStrMode = CDDSEVSTRUC;
1731
Char CDDref[PATH_MAX];
1732
Char cLink[23] = "linked to 3D-structure";
1734
Char cOutFile[PATH_MAX];
1737
/*---------------------------------------------------------------------------*/
1738
/* retrieve command-line parameters */
1739
/*---------------------------------------------------------------------------*/
1740
if (! GetArgs ("cddump", NUMARGS, myargs)) return (1);
1742
/*---------------------------------------------------------------------------*/
1743
/* retrieve names for directories etc. */
1744
/*---------------------------------------------------------------------------*/
1745
if (!CddGetParams()) CddSevError("Couldn't read from config file...");
1747
/*---------------------------------------------------------------------------*/
1748
/* initialize ID1/Entrez interface - this is needed to retrieve sequences */
1749
/*---------------------------------------------------------------------------*/
1750
if (!ID1BioseqFetchEnable("cddump",TRUE))
1751
CddSevError("Unable to initialize ID1");
1752
if (!MMDBInit()) CddHtmlError("MMDB Initialization failed");
1753
if ( !EntrezInit("Cddump", FALSE, &is_network))
1754
CddSevError("Unable to start Entrez");
1756
/*---------------------------------------------------------------------------*/
1758
/*---------------------------------------------------------------------------*/
1759
CddRegularizeFileName(myargs[0].strvalue,cCDDid,cCDDfname,myargs[1].strvalue);
1761
/*---------------------------------------------------------------------------*/
1762
/* read in an existing CD if necessary/possible */
1763
/*---------------------------------------------------------------------------*/
1764
if (myargs[12].strvalue) {
1765
if (myargs[13].intvalue > 0) {
1766
pcdd = (CddPtr) CddReadFromFile(myargs[12].strvalue, TRUE);
1767
} else pcdd = (CddPtr) CddReadFromFile(myargs[12].strvalue, FALSE);
1768
if (!pcdd) CddSevError("Could not read CD from file!");
1769
iSeqStrMode = CDDUPDATE;
1770
if (CddHasConsensus(pcdd)) CddSevError("Unable to process: CD has consensus");
1771
psaCAlignHead = (SeqAnnotPtr) pcdd->seqannot;
1772
salpHead = (SeqAlignPtr) pcdd->seqannot->data;
1773
bssp = (BioseqSetPtr) pcdd->sequences->data.ptrvalue;
1774
pbsaSeq = BiostrucAlignSeqNew();
1775
pbsaSeq->sequences = bssp->seq_set;
1776
if (myargs[14].strvalue) {
1777
strcpy(cCDDid, myargs[14].strvalue);
1781
/*---------------------------------------------------------------------------*/
1782
/* Initialize the data structures needed to collect alignments and sequences */
1783
/*---------------------------------------------------------------------------*/
1784
if (iSeqStrMode != CDDUPDATE) {
1785
pbsaSeq = BiostrucAlignSeqNew();
1787
/*---------------------------------------------------------------------------*/
1788
/* read in the ASN.1-formatted sequence alignment corresponding to this CD */
1789
/*---------------------------------------------------------------------------*/
1790
if (!myargs[11].strvalue) {
1791
salpHead = (SeqAlignPtr) CddReadSeqAln();
1793
salpHead = (SeqAlignPtr) CddReadRealInd(myargs[11].strvalue);
1796
/*---------------------------------------------------------------------------*/
1797
/* process the Seqalign */
1798
/*---------------------------------------------------------------------------*/
1799
pcds = (CddSumPtr) CddProcessSeqAln(&salpHead,&nPdb,pbsaSeq,
1800
myargs[19].intvalue);
1802
salpHead = CddAlignSort(salpHead,pcds);
1803
pcds = CddSumSort(pcds);
1804
} else if (nPdb == 1) {
1805
iSeqStrMode = CDDONESTRUC;
1807
iSeqStrMode = CDDSEQUONLY;
1810
/*---------------------------------------------------------------------------*/
1811
/* additional pointers to list of sequences */
1812
/*---------------------------------------------------------------------------*/
1814
pbsaStrSeq = BiostrucSeqsNew();
1815
pbsaStrSeq->sequences = pbsaSeq->sequences;
1816
} else if (nPdb > 1) {
1817
pbsaStruct = BiostrucAlignNew();
1818
pbsaStruct->sequences = pbsaSeq->sequences;
1821
/*---------------------------------------------------------------------------*/
1822
/* if more than one structure present, VAST results have to be retrieved */
1823
/*---------------------------------------------------------------------------*/
1828
OpenMMDBAPI((POWER_VIEW /* ^ FETCH_ENTREZ */), NULL);
1829
/*---------------------------------------------------------------------------*/
1830
/* read the information contained in the VAST info file. This is used to */
1831
/* figure out which biostruc annot sets have to be retrieved from the VAST */
1833
/*---------------------------------------------------------------------------*/
1834
CddReadVASTInfo(pcds);
1836
/*---------------------------------------------------------------------------*/
1837
/* Now identify the feature set id's that go with each structurally aligned */
1838
/* slave - these need not be the same */
1839
/*---------------------------------------------------------------------------*/
1840
CddIdentifyFsids(pcds);
1842
/*---------------------------------------------------------------------------*/
1843
/* load the biostruc annot sets (and trim them) for each aligned slave struc.*/
1844
/*---------------------------------------------------------------------------*/
1845
CddLoadBSAnnotSets(pcds, &nPdb, &iSeqStrMode, salpHead, pbsaStruct);
1851
/*---------------------------------------------------------------------------*/
1852
/* Disable ID1 BioseqAccess */
1853
/*---------------------------------------------------------------------------*/
1854
ID1BioseqFetchDisable();
1857
/*---------------------------------------------------------------------------*/
1858
/*---------------------------------------------------------------------------*/
1859
/* allocate the CDD data structure */
1860
/*---------------------------------------------------------------------------*/
1861
/*---------------------------------------------------------------------------*/
1862
pcdd = (CddPtr) CddNew();
1864
/*---------------------------------------------------------------------------*/
1865
/* add the alignment data to the new CD */
1866
/*---------------------------------------------------------------------------*/
1867
psaCAlignHead = SeqAnnotNew();
1868
psaCAlignHead->type = 2;
1869
psaCAlignHead->data = salpHead;
1870
pcdd->seqannot = (SeqAnnotPtr) psaCAlignHead;
1872
/*---------------------------------------------------------------------------*/
1873
/* Assign pointer to bioseqs */
1874
/*---------------------------------------------------------------------------*/
1875
bssp = BioseqSetNew();
1876
bssp->seq_set = pbsaSeq->sequences;
1877
pcdd->sequences = ValNodeNew(NULL);
1878
pcdd->sequences->choice = 2;
1879
pcdd->sequences->data.ptrvalue = bssp;
1881
/*---------------------------------------------------------------------------*/
1882
/* Assign pointer to BiostrucFeatureSet (holding the VAST alignments) */
1883
/*---------------------------------------------------------------------------*/
1885
pcdd->features = (BiostrucAnnotSetPtr) pbsaStruct->alignments;
1888
/*---------------------------------------------------------------------------*/
1889
/* fill in as much of the descriptive information as available */
1890
/* use a set of generic calls to CddAssignDescr */
1891
/*---------------------------------------------------------------------------*/
1892
pcddesc = CddReadDescr();
1893
pcddeschead = pcddesc;
1895
if (strcmp(cCDDid,pcddesc->cCddId)==0) {
1896
CddAssignDescr(pcdd,(CharPtr) strdup(pcddesc->cDescr),CddDescr_comment,0);
1897
if (strncmp(pcddesc->cSourc," ",1)!=0) {
1898
cCategory = StringSave(pcddesc->cSourc);
1899
cCategory[strlen(cCategory)-1]='\0';
1900
CddAssignDescr(pcdd, (CharPtr) strdup(cCategory),CddDescr_category,0);
1904
pcddesc = pcddesc->next;
1907
/*---------------------------------------------------------------------------*/
1908
/* note whether CD has link to 3D-Structure */
1909
/*---------------------------------------------------------------------------*/
1911
CddAssignDescr(pcdd, strdup(cLink), CddDescr_comment,0);
1914
CddAssignDescr(pcdd, NULL, CddDescr_status , myargs[8].intvalue);
1916
/*---------------------------------------------------------------------------*/
1917
/* assign the source identifier if present */
1918
/*---------------------------------------------------------------------------*/
1919
if (NULL == myargs[4].strvalue) {
1920
if (strncmp(myargs[0].strvalue,"pfam",4) == 0) {
1921
myargs[4].strvalue = strdup("Pfam");
1922
} else if (strncmp(myargs[0].strvalue,"LOAD",4) == 0) {
1923
myargs[4].strvalue = strdup("Load");
1925
myargs[4].strvalue = strdup("Smart");
1929
CddAssignDescr(pcdd,strdup(myargs[4].strvalue),CddDescr_source,0);
1931
/*---------------------------------------------------------------------------*/
1932
/* assign create date as the current date */
1933
/*---------------------------------------------------------------------------*/
1934
CddAssignDescr(pcdd,(DatePtr) DateCurr(),CddDescr_create_date,0);
1936
/*---------------------------------------------------------------------------*/
1937
/* Assign references if they can be found */
1938
/*---------------------------------------------------------------------------*/
1939
strcpy(CDDref,REFpath);
1940
strcat(CDDref,cCDDid);
1942
strcat(CDDref,myargs[7].strvalue);
1943
fp = FileOpen(CDDref,"r");
1948
pcTest = fgets(pcBuf, (size_t)100, fp);
1950
pcTest[strlen(pcTest)-1]='\0';
1951
muid = (Int4) atoi(pcTest);
1952
pub = (ValNodePtr) FetchPub(muid);
1953
if (!pub) pub = (ValNodePtr) FetchPubPmId(muid);
1954
if (pub) CddAssignDescr(pcdd, pub, CddDescr_reference, 0);
1961
} /* end if iSeqStrMode != CDDUPDATE */
1963
/*---------------------------------------------------------------------------*/
1964
/* if parsed from a real.ind-file, extract the CDD id from the input file */
1965
/* name. This is a kludge to deal with the setup of "split sets" for testing */
1966
/* with RPS-Blast. The CDD-id is only chaged at this point so that the refs */
1967
/* and descriptions are parsed according to the id of the 'mother' CD */
1968
/*---------------------------------------------------------------------------*/
1969
if (myargs[15].intvalue > 0 && myargs[11].strvalue) {
1970
strncpy(cCDDid,myargs[11].strvalue,strlen(myargs[11].strvalue)-9);
1973
/*---------------------------------------------------------------------------*/
1974
/* this part includes modifications which are part of a CD update as well */
1975
/*---------------------------------------------------------------------------*/
1976
pvnId = ValNodeNew(NULL);
1977
pGid = (GlobalIdPtr) GlobalIdNew();
1978
pGid->accession = strdup(cCDDid);
1979
pvnId->choice = CddId_gid;
1980
pvnId->data.ptrvalue = (GlobalIdPtr) pGid;
1983
/*---------------------------------------------------------------------------*/
1984
/* if requested, reindex the CD-Alignment to a new master. The argument must */
1985
/* be given as a PDB-derived Sequence ID, in the form of "1ABCX" or "1ABC" */
1986
/*---------------------------------------------------------------------------*/
1987
if (myargs[17].strvalue) {
1988
if (strlen(myargs[17].strvalue) < 4) CddSevError("option -R needs a valid PDB-Id");
1989
if (strlen(myargs[17].strvalue) > 5) CddSevError("option -R needs a valid PDB-Id");
1990
pdbsip = PDBSeqIdNew();
1991
if (strlen(myargs[17].strvalue) == 5) {
1992
pdbsip->chain = (Uint1) *(myargs[17].strvalue+4);
1993
myargs[17].strvalue[4]='\0';
1995
pdbsip->mol = strdup(myargs[17].strvalue);
1996
sip_master = ValNodeNew(NULL);
1997
sip_master->choice = SEQID_PDB;
1998
sip_master->data.ptrvalue = pdbsip;
1999
if (!CddFindSip(sip_master,bssp->seq_set)) CddSevError("Could not find new master in CD");
2001
psaCAlignHead = SeqAnnotNew();
2002
psaCAlignHead->type = 2;
2003
psaCAlignHead->data = CddReindexSeqAlign(salpHead,sip_master,bssp);
2004
salpHead = psaCAlignHead->data;
2005
pcdd->seqannot = (SeqAnnotPtr) psaCAlignHead;
2006
if (NULL != pcdd->features) pcdd->features = NULL;
2007
/*---------------------------------------------------------------------------*/
2008
/* for compliance with Cn3D++, the bioseqs need to be reordered so that the */
2009
/* new master appears on top */
2010
/*---------------------------------------------------------------------------*/
2011
septemp = bssp->seq_set;
2015
if (septemp->choice == 1) {
2016
bspThis = (BioseqPtr) septemp->data.ptrvalue;
2017
if (CddSameSip(bspThis->id, sip_master)) {
2020
remaind = septemp->next;
2023
else oldtail = septemp;
2024
} else (CddSevError("Invalid SeqEntry encountered while reindexing to new master"));
2025
septemp = septemp->next;
2027
bssp->seq_set = newhead;
2028
newtail->next = oldhead;
2029
oldtail->next = remaind;
2032
/*---------------------------------------------------------------------------*/
2033
/* assuming this is a set of pairwise master-slave dendiag alignments, */
2034
/* calculate the interval on the master that is aligned */
2035
/*---------------------------------------------------------------------------*/
2036
sip_master = CddDetermineMasterSip(pcdd);
2037
CddAssignProfileRange(pcdd, sip_master);
2039
/*---------------------------------------------------------------------------*/
2040
/* create identifier and description for the truncated master */
2041
/*---------------------------------------------------------------------------*/
2042
oidp = (ObjectIdPtr) ObjectIdNew();
2043
oidp->str = strdup(cCDDid);
2046
dbtp->db = myargs[4].strvalue;
2047
vnp = pcdd->description;
2049
if (vnp->choice == CddDescr_comment) {
2050
desc = ValNodeNew(NULL);
2051
desc->choice = Seq_descr_title;
2052
desc->data.ptrvalue = StringSave((CharPtr)vnp->data.ptrvalue);
2058
/*---------------------------------------------------------------------------*/
2059
/* and create the truncated master */
2060
/*---------------------------------------------------------------------------*/
2061
sintp = (SeqIntPtr) pcdd->profile_range;
2062
sipTrunc = (SeqIdPtr) ValNodeNew(NULL);
2063
sipTrunc->choice = 11;
2064
sipTrunc->data.ptrvalue = dbtp;
2065
bspTrunc = (BioseqPtr) BioseqCopy(sipTrunc,sip_master,sintp->from,sintp->to,0,FALSE);
2066
if (desc) bspTrunc->descr = SeqDescCopy(desc);
2067
if (myargs[9].intvalue == 0) {
2068
/*---------------------------------------------------------------------------*/
2069
/* add to cdd if no consensus is calculated */
2070
/*---------------------------------------------------------------------------*/
2071
pcdd->trunc_master = (struct bioseq PNTR) bspTrunc;
2072
/*---------------------------------------------------------------------------*/
2073
/* now calculate the PSSM and write the checkpoint file and corresponding */
2074
/* sequence out to disk */
2075
/*---------------------------------------------------------------------------*/
2076
salpCopy = (SeqAlignPtr) CddCopyMSLDenDiag(pcdd->seqannot->data);
2077
CddReindexMSLDenDiagMaster(salpCopy, sintp->from);
2078
bspFake = (BioseqPtr) BioseqCopy(NULL,sip_master,sintp->from,sintp->to,0,FALSE);
2079
CddDenDiagCposComp2(bspFake,myargs[10].intvalue,salpCopy,pcdd,bspTrunc,0.5,1.0,myargs[16].strvalue);
2081
/*---------------------------------------------------------------------------*/
2082
/* calculate consensus sequence using the 50/50 algorithm */
2083
/*---------------------------------------------------------------------------*/
2084
salpNew = (SeqAlignPtr) CddConsensus(salpHead,bssp->seq_set,
2085
bspTrunc,pcdd->profile_range,
2086
&bspCons, &salpCons);
2087
if (desc) bspCons->descr = SeqDescCopy(desc);
2088
sepNew = SeqEntryNew();
2089
sepNew->data.ptrvalue = bspCons;
2091
ValNodeLink(&(pbsaSeq->sequences), sepNew);
2093
sintp->to = bspCons->length - 1;
2094
sintp->id = SeqIdDup(bspCons->id);
2095
sip_master = sintp->id;
2096
BioseqFree(bspTrunc);
2097
bspTrunc = (BioseqPtr) BioseqCopy(sipTrunc,sip_master,sintp->from,sintp->to,0,FALSE);
2098
if (desc) bspTrunc->descr = SeqDescCopy(desc);
2099
bspFake = (BioseqPtr) BioseqCopy(NULL,sip_master,sintp->from,sintp->to,0,FALSE);
2100
CddDenDiagCposComp2(bspFake,myargs[10].intvalue,salpNew,pcdd,bspTrunc,0.5,1.0,myargs[16].strvalue);
2101
pcdd->trunc_master = (struct bioseq PNTR) bspTrunc;
2102
salpNew = SeqAlignSetFree(salpNew);
2103
/*---------------------------------------------------------------------------*/
2104
/* Now reindex the seqalign to make the consensus the new master */
2105
/*---------------------------------------------------------------------------*/
2106
pCDea1 = (CddExpAlignPtr) SeqAlignToCddExpAlign(salpCons,bssp->seq_set);
2107
pCDeaR = (CddExpAlignPtr) InvertCddExpAlign(pCDea1, bssp->seq_set);
2108
salpNew = (SeqAlignPtr) CddExpAlignToSeqAlign(pCDeaR,NULL);
2109
pCDeaR = CddExpAlignFree(pCDeaR);
2111
salpCopy = salpHead;
2112
salpHead = salpCopy;
2116
pCDea = (CddExpAlignPtr) SeqAlignToCddExpAlign(salpHead, bssp->seq_set);
2117
pCDeaR = (CddExpAlignPtr) CddReindexExpAlign(pCDea1,bspCons->length,pCDea,0,i);
2118
salpThis = (SeqAlignPtr) CddExpAlignToSeqAlign(pCDeaR,NULL);
2119
pCDeaR = CddExpAlignFree(pCDeaR);
2120
pCDea = CddExpAlignFree(pCDea);
2121
salpTail->next = salpThis;
2122
salpTail = salpThis;
2123
salpHead = salpHead->next;
2125
psaCAlignHead->data = salpNew;
2126
pCDea1 = CddExpAlignFree(pCDea1);
2127
salpCopy = SeqAlignSetFree(salpCopy);
2130
/*---------------------------------------------------------------------------*/
2131
/* if the CD needs to be written to disk, fill in the missing data items */
2132
/*---------------------------------------------------------------------------*/
2133
if (myargs[18].intvalue == 0) {
2136
/*---------------------------------------------------------------------------*/
2137
/* do the following steps only if the CD-status is not 2, pending release */
2138
/*---------------------------------------------------------------------------*/
2139
if (myargs[8].intvalue != 2) {
2141
/*---------------------------------------------------------------------------*/
2142
/* Calculate pairwise percent identities between the sequences, for alignment*/
2143
/* formatting purposes */
2144
/*---------------------------------------------------------------------------*/
2145
pTri = (TrianglePtr) CddCalculateTriangle(pcdd);
2146
if (pTri) pcdd->distance = pTri;
2148
pcdd->posfreq = NULL;
2149
pcdd->scoremat = NULL;
2152
/*---------------------------------------------------------------------------*/
2153
/* create/update the CD taxonomy data item */
2154
/*---------------------------------------------------------------------------*/
2157
/*---------------------------------------------------------------------------*/
2158
/* check if CD is a proper CD, and trim the seqaligns if necessary */
2159
/*---------------------------------------------------------------------------*/
2160
if (myargs[8].intvalue !=3 && myargs[20].intvalue > 0) {
2161
if (CddTrimSeqAligns(pcdd)) CddSevError("Crashed while trimming seqaligns!");
2164
/*---------------------------------------------------------------------------*/
2165
/* for output, convert the pairwise master-slave densediag to a multiple */
2166
/* dense-diag alignment if the block-structure is consistent - and if */
2167
/* specified in the command line */
2168
/*---------------------------------------------------------------------------*/
2169
if (myargs[5].intvalue > 0) {
2170
pcdd->seqannot->data = (SeqAlignPtr)CddMSLDenDiagToMULDenDiag(pcdd->seqannot->data);
2173
/*---------------------------------------------------------------------------*/
2174
/* assign a CD-name automatically if not yet present in the CD */
2175
/*---------------------------------------------------------------------------*/
2177
pcdd->name = strdup(cCDDid);
2179
if (myargs[22].intvalue) {
2180
if (Nlm_StrNCmp(pcdd->name,"pfam0",5) == 0 ||
2181
Nlm_StrNCmp(pcdd->name,"smart0",6) == 0 ||
2182
Nlm_StrNCmp(pcdd->name,"LOAD_",5) == 0) {
2183
vnp = pcdd->description;
2185
if (vnp->choice == CddDescr_comment) {
2186
if (Nlm_StrCmp(vnp->data.ptrvalue,"linked to 3D-structure") != 0) {
2187
cDescStr = StringSave(vnp->data.ptrvalue);
2193
cDescNew = Nlm_StrStr(cDescStr,",");
2195
if (Nlm_StrLen(cDescNew) > 2) {
2196
vnp->data.ptrvalue = cDescNew + 2;
2197
Nlm_StrTok(cDescStr,",");
2198
pcdd->name = cDescStr;
2204
/*---------------------------------------------------------------------------*/
2205
/* Write the finished CD out to disk */
2206
/*---------------------------------------------------------------------------*/
2207
strcpy(cOutFile,cCDDid);
2208
strcat(cOutFile,".");
2209
strcat(cOutFile,myargs[1].strvalue);
2211
if (!CddWriteToFile(pcdd,cOutFile,(Boolean) myargs[2].intvalue))
2212
CddSevError("Could not write ASN.1 output / CDD to file!");
2214
/*---------------------------------------------------------------------------*/
2215
/* output the corresponding Cdd tree-file */
2216
/*---------------------------------------------------------------------------*/
2217
strcpy(cOutFile,cCDDid);
2218
strcat(cOutFile,".");
2219
strcat(cOutFile,myargs[6].strvalue);
2221
pcddt = CddTreeNew();
2222
pcddt->name = pcdd->name;
2223
pcddt->id = pcdd->id;
2224
pcddt->description = pcdd->description;
2226
if (!CddTreeWriteToFile(pcddt,cOutFile,(Boolean) myargs[2].intvalue))
2227
CddSevError("Could not write CDD-tree");
2230
/*---------------------------------------------------------------------------*/
2232
/*---------------------------------------------------------------------------*/
2233
/* pcdd = (CddPtr) CddFreeCarefully(pcdd); */
2234
pcddeschead = CddDescFree(pcddeschead);
2235
pcds = CddSumFree(pcds);