3
* ===========================================================================
6
* National Center for Biotechnology Information (NCBI)
8
* This software/database is a "United States Government Work" under the
9
* terms of the United States Copyright Act. It was written as part of
10
* the author's official duties as a United States Government employee and
11
* thus cannot be copyrighted. This software/database is freely available
12
* to the public for use. The National Library of Medicine and the U.S.
13
* Government do not place any restriction on its use or reproduction.
14
* We would, however, appreciate having the NCBI and the author cited in
15
* any work or product based on this material
17
* Although all reasonable efforts have been taken to ensure the accuracy
18
* and reliability of the software and data, the NLM and the U.S.
19
* Government do not and cannot warrant the performance or results that
20
* may be obtained by using this software or data. The NLM and the U.S.
21
* Government disclaim all warranties, express or implied, including
22
* warranties of performance, merchantability or fitness for any particular
25
* ===========================================================================
27
* File Name: vastchkid.c
31
* Version Creation Date: 6/16/98
33
* $Log: vastchkid.c,v $
34
* Revision 1.1.1.1 2002/12/06 20:17:21 chenj
37
* Revision 6.7 2000/05/26 22:31:40 ywang
38
* assign local for various types of seq features
40
* Revision 6.6 1999/07/21 22:33:43 ywang
41
* assign local id to seq-annot
43
* Revision 6.5 1998/12/22 18:01:52 addess
44
* changes relevant to reading new type of annot-set data
46
* Revision 6.4 1998/11/27 16:54:05 addess
47
* made change to AssignLocaIdToSeqAlignMaster as suggested by Hugues
49
* Revision 6.3 1998/10/21 15:42:16 ywang
50
* to simplify extraction of vast alignment clique number
52
* Revision 6.2 1998/10/14 17:21:42 addess
53
* for sending aligned chains from vastsearch
55
* Revision 6.1 1998/07/17 18:42:33 madej
56
* Handles local sequence IDs.
69
#include <sys/resource.h>
75
#include "vast2mage.h"
82
Char pSegmentMaster[Local_max];
83
/*--------------- find PDBSeqId ------------------*/
84
PDBSeqIdPtr GetPdbSeqId(SeqIdPtr sip)
85
/* may need to be modified according to how bioseq id is */
87
SeqIdPtr seq_id = NULL;
88
PDBSeqIdPtr pdb_seq_id = NULL;
92
while(seq_id != NULL){
93
if(seq_id->choice == 15){
94
pdb_seq_id = seq_id->data.ptrvalue;
97
seq_id = seq_id->next;
102
/***************************************************************************/
103
/* GetLocalId() Ken */
104
/***************************************************************************/
105
ObjectIdPtr GetLocalId(SeqIdPtr sip)
107
ObjectIdPtr oip = NULL;
108
SeqIdPtr seq_id = NULL;
112
while(seq_id != NULL)
114
if (seq_id->choice == 1)
116
oip = seq_id->data.ptrvalue;
119
seq_id = seq_id->next;
124
/*---------------- remove redundancy sequences ---------------*/
125
SeqEntryPtr ExtractThisSep(SeqEntryPtr sepThis, Char Chain)
127
SeqEntryPtr sepThisNew = NULL;
128
BioseqPtr bsp = NULL;
129
BioseqSetPtr bssp = NULL;
130
PDBSeqIdPtr pdb_seq_id;
131
ObjectIdPtr object_id;
136
sepThisNew = SeqEntryNew();
137
sepThisNew->choice = 1;
139
choice = sepThis->choice;
141
sepThisNew->data.ptrvalue = sepThis->data.ptrvalue;
144
else if(choice == 2){
145
bssp = sepThis->data.ptrvalue;
146
sepThis = bssp->seq_set;
148
while(sepThis != NULL){
149
bsp = sepThis->data.ptrvalue;
150
if (bsp->id->choice == 15)
152
pdb_seq_id = GetPdbSeqId(bsp->id);
153
if(pdb_seq_id->chain !=NULL) ThisChain = (Char) pdb_seq_id->chain;
154
else ThisChain = ' ';
156
if (bsp->id->choice == 1)
158
object_id = GetLocalId(bsp->id);
159
if (object_id->str) ThisChain = object_id->str[5];
162
if(ThisChain == Chain) {
163
sepThisNew->data.ptrvalue = bsp;
167
sepThis = sepThis->next;
172
/*---------------- make a new SeqEntry ------------------------*/
173
NcbiMimeAsn1Ptr ScreenSequence( NcbiMimeAsn1Ptr pvnNcbi, CharPtr JobID)
175
BiostrucAlignPtr pbsaStruct = NULL;
176
BiostrucAnnotSetPtr pbsaThis = NULL, pbsaHead = NULL;
177
BiostrucFeatureSetPtr pbsfsThis = NULL;
178
BiostrucFeaturePtr pbsfThis = NULL;
179
SeqEntryPtr sepHead = NULL, sepHeadNew = NULL, sepThis = NULL;
181
/*Char PDBName[Local_max], */
183
CharPtr pSegmentSlave;
185
pbsaStruct = pvnNcbi->data.ptrvalue;
187
pbsaThis = pbsaStruct->alignments;
188
pbsfsThis = pbsaThis->features;
189
pbsfThis = pbsfsThis->features;
191
StringNCpy(pSegmentMaster, pbsfThis->name, 6);
192
/* StringNCpy(PDBName, pSegmentMaster, 4); */
193
Chain = pSegmentMaster[4];
195
sepHead = pbsaStruct->sequences;
197
sepThis = SeqEntryNew();
199
sepThis->data.ptrvalue = sepHead->data.ptrvalue;
202
sepThis = ExtractThisSep(sepHead, Chain);
203
ValNodeLink(&sepHeadNew, sepThis);
205
sepHead = sepHead->next;
207
pSegmentSlave = StringSave(&pbsfThis->name[7]);
208
pSegmentSlave[6]='\0';
210
/*StringNCpy(PDBName, pSegmentSlave, 4);*/
211
Chain = pSegmentSlave[4];
213
sepThis = ExtractThisSep(sepHead, Chain);
214
ValNodeLink(&sepHeadNew, sepThis);
216
pbsfThis = pbsfThis->next;
217
sepHead = sepHead->next;
220
pbsaStruct->sequences = sepHeadNew;
221
MemFree(pSegmentSlave);
225
/*---------------- replace original seq_id by local_id --------------*/
226
void AssignLocaIdToBiostruc(SeqIdPtr sip, Char *PDBName, Char Chain,BiostrucPtr bsp)
228
BiostrucDescrPtr pbsdrThis = NULL;
229
MoleculeGraphPtr pmgThis = NULL;
230
BiomolDescrPtr pbmdrThis = NULL;
232
Char ThisPDBName[Local_max], *ThisChain;
234
StringCpy(ThisPDBName, PDBNAME_DEFAULT);
235
pbsdrThis = ValNodeFindNext(bsp->descr, NULL, BiostrucDescr_name);
236
if(pbsdrThis) StringNCpy(ThisPDBName, pbsdrThis ->data.ptrvalue, 4);
238
/*if(StringCmp(PDBName, ThisPDBName) != 0) {
239
printf("Something is wrong!\n");
243
pmgThis = bsp->chemical_graph->molecule_graphs;
246
/* loop over molecule_graphs to match chain */
247
pbmdrThis = ValNodeFindNext(pmgThis->descr, NULL, BiomolDescr_name);
249
ThisChain = StringSave((CharPtr)pbmdrThis->data.ptrvalue);
251
else ThisChain = StringSave(STRING_UNK_MOL);
252
if(Chain == *ThisChain) {
253
pmgThis->seq_id = sip;
257
pmgThis = pmgThis->next;
261
/***************************************************************************/
262
/* GetObjectId() Ken */
263
/***************************************************************************/
264
ObjectIdPtr GetObjectId(BiostrucPtr bsp, CharPtr pSegmentThis)
268
MoleculeGraphPtr mgp;
270
for (mgp = bsp->chemical_graph->molecule_graphs; mgp; mgp = mgp->next)
273
oip = (ObjectIdPtr)sip->data.ptrvalue;
274
if (oip->str[5] == pSegmentThis[4]) break;
279
/*--------------- check SeqId for each Bioseq -----------------*/
280
Boolean ReplaceBioseqId(SeqIdPtr sip, Char *PDBName, Char Chain, BioseqPtr bsp)
282
PDBSeqIdPtr pdb_seq_id;
285
SeqAnnotPtr sap = NULL;
286
SeqFeatPtr sfp = NULL;
287
SeqLocPtr slp = NULL;
288
SeqIntPtr sintp = NULL;
289
SeqIdPtr sip_temp = NULL;
291
SeqLocPtr slp_mix = NULL;
292
SeqPntPtr spp = NULL;
293
SeqBondPtr sbp = NULL;
295
pdb_seq_id = GetPdbSeqId(bsp->id);
296
/* may need to be modified according to how bioseq id is */
297
/* in bioseq fetched from Entrez, PDBSeqId exists, but not sure */
298
/* how struc2seq works */
299
if(pdb_seq_id->chain !=NULL) ThisChain = (Char) pdb_seq_id->chain;
300
else ThisChain = ' ';
302
if(ThisChain == Chain) {
306
if(sap == NULL) return TRUE;
307
else if(sap->type == 1){
312
if(slp->choice == SEQLOC_INT) {
313
sintp = slp->data.ptrvalue;
315
sip_temp = sintp->id;
317
/* sip_temp = SeqIdFree(sip_temp); */
318
/* cause problem when slave is a duplication of master */
319
/* might be related to the way how the sequence is made */
323
else if(slp->choice == SEQLOC_BOND){
324
sbp = slp->data.ptrvalue;
326
if(spp) spp->id = sip;
328
if(spp) spp->id = sip;
330
else if(slp->choice == SEQLOC_MIX){
331
slp_mix = slp->data.ptrvalue;
333
if(slp_mix->choice == SEQLOC_INT){
334
sintp = slp_mix->data.ptrvalue;
335
if(sintp) sintp->id = sip;
337
else if(slp_mix->choice == SEQLOC_BOND){
338
sbp = slp_mix->data.ptrvalue;
340
if(spp) spp->id = sip;
342
if(spp) spp->id = sip;
344
slp_mix = slp_mix->next;
360
/*---------------- replace original seq_id by local_id --------------*/
361
void AssignLocaIdToBioseq(SeqIdPtr sip, Char *PDBName, Char Chain, SeqEntryPtr sep)
367
Boolean IdReplaced = FALSE;
369
choice = sep->choice;
371
bsp = sep->data.ptrvalue;
372
ReplaceBioseqId(sip, PDBName, Chain, bsp);
374
else if(choice == 2){
375
bssp = sep->data.ptrvalue;
379
bsp = sep->data.ptrvalue;
380
IdReplaced = ReplaceBioseqId(sip, PDBName, Chain, bsp);
381
if(IdReplaced) return;
387
/*---------------- replace original seq_id by local_id --------------*/
388
void AssignLocaIdToSeqAlign(SeqIdPtr sip, Char *PDBName, Char Chain, SeqAlignPtr salpHead)
391
DenseDiagPtr ddp = NULL;
392
SeqIdPtr sipThis = NULL;
393
PDBSeqIdPtr pdb_seq_id;
396
ddp = salpHead->segs;
400
sipThis = sipThis->next;
401
/* get to the slave SeqId */
403
pdb_seq_id = GetPdbSeqId(sipThis);
404
if(pdb_seq_id != NULL){
405
if(pdb_seq_id->chain !=NULL) ThisChain = (Char) pdb_seq_id->chain;
406
else ThisChain = ' ';
407
if(ThisChain == Chain) ddp->id->next = sip;
408
/* actually match chain here may not be neccessary */
416
/*---------------- To put Local_id in segs for Vastsearch - Ken -----------*/
417
void AssignLocaIdToSeqAlignMaster(SeqIdPtr sip, SeqAlignPtr salpHead)
421
SeqIdPtr sipTemp = NULL;
423
ddp = salpHead->segs;
428
sipTemp = ddp->id->next;
430
ddp->id = AsnIoMemCopy(sip, (AsnReadFunc) SeqIdAsnRead, (AsnWriteFunc) SeqIdAsnWrite);
431
ddp->id->next = sipTemp;
433
ErrPostEx(SEV_ERROR,0,0,"AssignLocalIdToSeqAlignMaster: No SeqId on SeqALign\n");
438
/*---------------- to check if an identical chain exist ----------------*/
439
Boolean FindIdSlave(Char *pSegmentThis, BiostrucFeaturePtr pbsfHead, Int4 iCount1)
443
if(StringNCmp(pSegmentThis, pSegmentMaster, 5) == 0){
444
/* only compare PDB code and Chain Id */
447
else if(iCount1 == 0) {
453
if(iCount2 >= iCount1) return FALSE;
454
if(StringNCmp(pSegmentThis, pbsfHead->name + 7, 5) == 0) {
455
/* only compare PDB code and Chain Id */
460
pbsfHead = pbsfHead->next;
466
/*---------------- make new LocalId ------------------------*/
467
SeqIdPtr MakeLocalId(Char *str)
475
sip = ValNodeNew(NULL);
476
sip->choice = SEQID_LOCAL;
477
sip->data.ptrvalue = oip;
481
/*---------------- to assign local id to identical chain ---------------*/
482
NcbiMimeAsn1Ptr CheckId(NcbiMimeAsn1Ptr pvnNcbi, CharPtr JobID)
484
/* to assign local id to identical chain so that to work around */
485
/* object manager for messaging */ /* yanli */
487
BiostrucAlignPtr pbsaStruct = NULL;
488
BiostrucPtr pbsMaster = NULL, pbsSlaveHead = NULL;
489
BiostrucAnnotSetPtr pbsaThis = NULL, pbsaHead = NULL;
490
SeqAnnotPtr psaAlignHead = NULL;
493
BiostrucFeatureSetPtr pbsfsThis = NULL, pbsfsHead = NULL;
494
BiostrucFeaturePtr pbsfThis = NULL, pbsfHead = NULL;
495
SeqAlignPtr salpHead;
497
SeqIdPtr sip, sip_cpy, sipMaster, sip_temp;
498
ObjectIdPtr oipMaster, oip;
500
Int4 iCount1 = 0, iCount_ID = 0;
501
Boolean IdChainFound = FALSE;
503
CharPtr pSegmentSlave;
504
Char PDBName[Local_max], Chain, Domain;
506
Char AlignIdStr[Local_max];
509
typedef struct local_string{
511
}Local_String, PNTR Local_StringPtr;
515
pvnNcbi = ScreenSequence(pvnNcbi, JobID);
517
pbsaStruct = pvnNcbi->data.ptrvalue;
519
pbsaThis = pbsaStruct->alignments;
520
pbsSlaveHead = pbsaStruct->slaves;
521
sepHead = pbsaStruct->sequences;
522
psaAlignHead = pbsaStruct->seqalign;
524
pbsfsThis = pbsaThis->features;
525
pbsfThis = pbsfsThis->features;
526
/* from vastsrv page, we are always considering one */
527
/* domain of the master, so no loop for pbsfsThis */
529
StringNCpy(pSegmentMaster, pbsfThis->name, 6);
530
pSegmentMaster[6]='\0';
531
/*pSegmentMaster[0]=pbsfThis->name[0];
532
pSegmentMaster[1]=pbsfThis->name[1];
533
pSegmentMaster[2]=pbsfThis->name[2];
534
pSegmentMaster[3]=pbsfThis->name[3];
535
pSegmentMaster[4]=pbsfThis->name[4];
536
pSegmentMaster[5]=pbsfThis->name[5];
537
pSegmentMaster[6]='\0'; */
539
/* see if Master Sequence was generated by VastSearch - Ken*/
542
for (salpHead = psaAlignHead->data; salpHead; salpHead = salpHead->next)
544
oipMaster = ObjectIdNew();
545
oipMaster = GetObjectId(pbsaStruct->master, pSegmentMaster);
547
sipMaster = ValNodeNew(NULL);
548
sipMaster->choice = SEQID_LOCAL;
549
sipMaster->data.ptrvalue = oipMaster;
550
AssignLocaIdToSeqAlignMaster(sipMaster, salpHead);
554
salpHead = psaAlignHead->data;
555
/* from SeqAnnot to SeqAlign */
556
sepHead = sepHead->next;
557
/* go to the slave sequences */
561
pSegmentSlave = StringSave(&pbsfThis->name[7]);
562
pSegmentSlave[6]='\0';
564
IdChainFound = FindIdSlave(pSegmentSlave, pbsaThis->features->features, iCount1);
565
if(IdChainFound) iCount_ID++;
567
MemFree(pSegmentSlave);
568
pbsfThis = pbsfThis->next;
571
str = (Pointer) MemNew((size_t) ((iCount_ID + 1) * sizeof(Pointer)));
572
for(iCount1 = 0; iCount1 < iCount_ID; iCount1++){
573
str[iCount1] = (Local_StringPtr) MemNew(sizeof(Local_String));
576
pbsfThis = pbsfsThis->features;
581
pSegmentSlave = StringSave(&pbsfThis->name[7]);
582
pSegmentSlave[6]='\0';
584
/* pSegmentSlave[0]=pbsfThis->name[7];
585
pSegmentSlave[1]=pbsfThis->name[8];
586
pSegmentSlave[2]=pbsfThis->name[9];
587
pSegmentSlave[3]=pbsfThis->name[10];
588
pSegmentSlave[4]=pbsfThis->name[11];
589
pSegmentSlave[5]=pbsfThis->name[12];
590
pSegmentSlave[6]='\0'; */
592
IdChainFound = FindIdSlave(pSegmentSlave, pbsaThis->features->features, iCount1);
594
StringNCpy(str[iCount_ID]->str, pSegmentSlave, 4);
595
str[iCount_ID]->str[4] = ' '; str[iCount_ID]->str[5] = pSegmentSlave[4];
596
str[iCount_ID]->str[6] = ' '; str[iCount_ID]->str[7] = pSegmentSlave[5];
597
str[iCount_ID]->str[8] = '\0';
599
AlignId = (Int2) (pbsfThis->id % 10) ; /* clique number */
600
sprintf(AlignIdStr, "%d", AlignId);
601
str[iCount_ID]->str[8] = ' '; str[iCount_ID]->str[9] = '\0';
602
StringCat(str[iCount_ID]->str, AlignIdStr);
604
sip_cpy = MakeLocalId(str[iCount_ID]->str);
607
/* oip = ObjectIdNew();
610
sip = ValNodeNew(NULL);
611
sip->choice = SEQID_LOCAL;
612
sip->data.ptrvalue = oip;
614
sip_cpy = AsnIoMemCopy(sip, (AsnReadFunc) SeqIdAsnRead, (AsnWriteFunc) SeqIdAsnWrite); */
616
StringNCpy(PDBName, pSegmentSlave, 4);
617
Chain = pSegmentSlave[4];
619
AssignLocaIdToBiostruc(sip_cpy, PDBName, Chain, pbsSlaveHead);
620
AssignLocaIdToBioseq(sip_cpy, PDBName, Chain, sepHead);
621
AssignLocaIdToSeqAlign(sip_cpy, PDBName, Chain, salpHead);
623
MemFree(pSegmentSlave);
624
pbsfThis = pbsfThis->next;
626
if(pbsSlaveHead) pbsSlaveHead = pbsSlaveHead->next;
627
if(sepHead) sepHead = sepHead->next;
628
if(salpHead) salpHead = salpHead->next;