3
* ===========================================================================
6
* National Center for Biotechnology Information (NCBI)
8
* This software/database is a "United States Government Work" under the
9
* terms of the United States Copyright Act. It was written as part of
10
* the author's official duties as a United States Government employee and
11
* thus cannot be copyrighted. This software/database is freely available
12
* to the public for use. The National Library of Medicine and the U.S.
13
* Government do not place any restriction on its use or reproduction.
14
* We would, however, appreciate having the NCBI and the author cited in
15
* any work or product based on this material
17
* Although all reasonable efforts have been taken to ensure the accuracy
18
* and reliability of the software and data, the NLM and the U.S.
19
* Government do not and cannot warrant the performance or results that
20
* may be obtained by using this software or data. The NLM and the U.S.
21
* Government disclaim all warranties, express or implied, including
22
* warranties of performance, merchantability or fitness for any particular
25
* ===========================================================================
27
* File Name: mkbioseq_vs.c
31
* $Log: mkbioseq_vs.c,v $
32
* Revision 1.1.1.1 2002/12/06 20:17:21 chenj
35
* Revision 6.2 1999/06/15 18:12:53 addess
36
* fixed some lines related to BioseqPtr
38
* Revision 6.2 1999/06/15 18:12:53 addess
39
* fixed some lines related to BioseqPtr
41
* Revision 6.1 1998/07/17 18:59:57 madej
42
* Created by Ken Addess.
46
/************************************************************/
50
/* Creates a Bioseq/BioseqSet object from a Biostruc */
51
/* object and writes it out to a file. */
53
/************************************************************/
58
static Int4 NumberOfBioChains(MoleculeGraphPtr mgp)
65
vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
67
if (vnp) mtype = vnp->data.intvalue;
84
static MoleculeGraphPtr MakeBioGraphPtr(MoleculeGraphPtr mgp)
86
MoleculeGraphPtr newbp, bp = NULL, currentbp;
92
vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
94
if (vnp) mtype = vnp->data.intvalue;
101
newbp = MoleculeGraphNew();
103
newbp->descr = mgp->descr;
104
newbp->seq_id = mgp->seq_id;
105
newbp->residue_sequence = mgp->residue_sequence;
106
newbp->inter_residue_bonds = mgp->inter_residue_bonds;
113
currentbp->next = newbp;
125
static Int4 NumberOfHetChains(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
128
Int4 mtype, molecule_id, nhet = 0;
133
vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
135
if (vnp) mtype = vnp->data.intvalue;
140
if (vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name))
141
mname = vnp->data.ptrvalue;
142
molecule_id = atoi(mname);
144
if (isBiopoly(molecule_id, bp))
155
static MoleculeGraphPtr MakeHetGraphPtr(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
157
MoleculeGraphPtr newhet, het = NULL, currenthet;
159
Int4 mtype, molecule_id;
164
vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
166
if (vnp) mtype = vnp->data.intvalue;
171
vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name);
173
if (vnp) mname = vnp->data.ptrvalue;
175
molecule_id = atoi(mname);
176
if (isBiopoly(molecule_id, bp))
178
newhet = MoleculeGraphNew();
179
newhet->id = mgp->id;
180
newhet->descr = mgp->descr;
181
newhet->seq_id = mgp->seq_id;
182
newhet->residue_sequence = mgp->residue_sequence;
183
newhet->inter_residue_bonds = mgp->inter_residue_bonds;
190
currenthet->next = newhet;
204
SeqEntryPtr LIBCALL MakeBioseqs(BiostrucPtr bsp, BiostrucResidueGraphSetPtr stdDictionary)
206
ValNodePtr vnp, seq_set, hetval, pvnThePoints;
207
BiostrucHistoryPtr bhp;
208
BiostrucSourcePtr bssp;
209
BiostrucGraphPtr bsgp;
210
BiostrucModelPtr bsmp;
211
BiostrucFeatureSetPtr bsfsp;
212
BiostrucFeaturePtr bsfp;
213
ChemGraphPntrsPtr cgpp;
217
ResidueIntervalPntrPtr ripp;
218
ResidueExplicitPntrsPtr rpp1=NULL, rpp2=NULL;
219
MoleculeGraphPtr bp, het, currenthet, currentbp, mgp;
220
InterResidueBondPtr currentabp, abp;
222
SeqEntryPtr pdb_entry;
224
BioseqPtr bioseqs[MAXNUM], current_bioseq;
225
Int4 DomainNum, molId1, resId1, atmId1, molId2, resId2, atmId2;
226
Int4 nbp, nhet, num_chain, index = 0, chnidx, bpchnidx, bpresidx, hetidx, rescount, bioseq_idx;
227
Int4 ssresidx1, ssresidx2, ssmolidx1, ssmolidx2;
228
CharPtr feature_name, rname;
229
Boolean interchain, bonds, found1, found2;
230
SeqAnnotPtr sap = NULL;
238
vnp = ValNodeFindNext(bsp->descr, NULL, BiostrucDescr_history);
242
bhp = (BiostrucHistoryPtr) vnp->data.ptrvalue;
243
bssp = bhp->data_source;
246
bsgp = bsp->chemical_graph;
249
nbp = NumberOfBioChains(bsgp->molecule_graphs);
251
bp = MakeBioGraphPtr(bsgp->molecule_graphs);
253
nhet = NumberOfHetChains(bsgp->molecule_graphs, bp);
255
het = MakeHetGraphPtr(bsgp->molecule_graphs, bp);
257
pdb_entry = CreateSeqEntry(bssp, bsgp, bsmp, bsp->descr, nbp);
259
if (IS_Bioseq(pdb_entry))
261
vnp = ValNodeFindNext(pdb_entry, NULL, 1);
262
bioseqs[index] = (BioseqPtr) vnp->data.ptrvalue;
266
vnp = ValNodeFindNext(pdb_entry, NULL, 2);
267
biossp = (BioseqSetPtr) vnp->data.ptrvalue;
268
seq_set = biossp->seq_set;
270
for (num_chain = 0; num_chain < nbp, seq_set; seq_set = seq_set->next, num_chain++, index++)
271
bioseqs[index] = (BioseqPtr) seq_set->data.ptrvalue;
274
dtp = (DbtagPtr)bssp->database_entry_id->data.ptrvalue;
276
for (index = 0, currentbp = bp; index < nbp, currentbp != NULL; currentbp = currentbp->next, index++)
278
current_bioseq = bioseqs[index];
279
if (currentbp->seq_id->choice == '\f')
281
current_bioseq->id = MakePDBId(bssp, currentbp, dtp);
282
sip = ValNodeNew(NULL);
283
sip->choice = SEQID_GI;
284
sip->data.intvalue = currentbp->seq_id->data.intvalue;
285
current_bioseq->id->next = sip;
287
else if (currentbp->seq_id->choice == SEQID_LOCAL)
288
current_bioseq->id = MakeLocalID(-99999, currentbp, dtp);
289
current_bioseq->descr = MakeBioseqDescr(currentbp, current_bioseq->descr);
290
current_bioseq->mol = MakeBioseqMol(currentbp);
291
current_bioseq->length = CountNumOfResidues(currentbp);
293
if (current_bioseq->mol == Seq_mol_aa)
294
current_bioseq->seq_data_type = Seq_code_iupacaa;
296
current_bioseq->seq_data_type = Seq_code_iupacna;
298
current_bioseq->seq_data = AddSeqData(currentbp, current_bioseq->mol, current_bioseq->length, bsgp, stdDictionary);
299
current_bioseq->annot = AddNstdSeqAnnot(currentbp, current_bioseq->id, bsgp);
301
/* Add information about Secondary Structure and Domains */
302
for (bsfsp = bsp->features, DomainNum = 0; bsfsp; bsfsp = bsfsp->next)
304
if (vnp = ValNodeFindNext(bsfsp->descr, NULL, BiostrucFeatureSetDescr_name))
305
feature_name = vnp->data.ptrvalue;
307
if ((!StringICmp("NCBI assigned secondary structure", feature_name)) ||
308
(!StringICmp("NCBI Domains", feature_name)))
310
for (bsfp = bsfsp->features; bsfp; bsfp = bsfp->next)
312
cgpp = (ChemGraphPntrsPtr)bsfp->Location_location->data.ptrvalue;
313
rpp = (ResiduePntrsPtr)cgpp->data.ptrvalue;
314
ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
315
chnidx = findChnidx(ripp->molecule_id, nbp, bp);
316
current_bioseq = bioseqs[chnidx-1];
318
if (!StringICmp("NCBI Domains", feature_name)) DomainNum++;
320
if (current_bioseq->annot)
321
current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, current_bioseq->annot, current_bioseq->id, DomainNum);
323
current_bioseq->annot = AddSecDomToSeqAnnot(bsfp, feature_name, NULL, current_bioseq->id, DomainNum);
327
for (index = 0, currenthet = het; index < nhet, currenthet; currenthet = currenthet->next, index++)
329
hetval = MakeHetValNode(currenthet, stdDictionary, bsgp->residue_graphs);
333
for (abp = bsgp->inter_molecule_bonds, bonds = FALSE, rescount = 0; abp; abp = abp->next)
335
molId1 = abp->atom_id_1->molecule_id;
336
molId2 = abp->atom_id_2->molecule_id;
337
resId1 = abp->atom_id_1->residue_id;
338
resId2 = abp->atom_id_2->residue_id;
339
atmId1 = abp->atom_id_1->atom_id;
340
atmId2 = abp->atom_id_2->atom_id;
342
if (isBiopoly(molId1, bp) && isHet(molId2, het))
344
bpchnidx = molId1 - 1;
345
bpresidx = resId1 - 1;
346
hetidx = getHetIdx(molId2, het);
349
else if (isBiopoly(molId2, bp) && isHet(molId1, het))
351
bpchnidx = molId2 - 1;
352
bpresidx = resId2 - 1;
353
hetidx = getHetIdx(molId1, het);
361
if (!rescount) pvnThePoints = NULL;
362
ValNodeAddInt(&pvnThePoints, 0, bpresidx);
366
if (bioseq_idx != bpchnidx) interchain = TRUE;
368
bioseq_idx = bpchnidx;
377
current_bioseq = bioseqs[bioseq_idx];
378
if (current_bioseq->annot)
379
current_bioseq->annot = AddHetToSeqAnnot(current_bioseq->annot, current_bioseq->id, hetval, pvnThePoints, rescount);
381
current_bioseq->annot = AddHetToSeqAnnot(NULL, current_bioseq->id, hetval, pvnThePoints, rescount);
385
if (IS_Bioseq(pdb_entry))
386
sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
388
sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
393
if (IS_Bioseq(pdb_entry))
394
((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
396
((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
398
sap = AddHetToSeqAnnot(sap, bioseqs[bioseq_idx]->id, hetval, pvnThePoints, rescount);
403
current_bioseq = bioseqs[bioseq_idx];
404
vnp = current_bioseq->descr;
407
while (vnp->next != NULL) vnp = vnp->next;
410
else current_bioseq->descr = hetval;
414
mgp = bsgp->molecule_graphs;
415
abp = bsgp->inter_molecule_bonds;
421
currentabp = mgp->inter_residue_bonds;
425
while (currentabp != NULL)
427
molId1 = currentabp->atom_id_1->molecule_id;
428
molId2 = currentabp->atom_id_2->molecule_id;
429
resId1 = currentabp->atom_id_1->residue_id;
430
resId2 = currentabp->atom_id_2->residue_id;
431
atmId1 = currentabp->atom_id_1->atom_id;
432
atmId2 = currentabp->atom_id_2->atom_id;
438
if ((getAtomElementIdx(molId1, resId1, atmId1, bsgp, stdDictionary)==16)
439
&& (getAtomElementIdx(molId2, resId2, atmId2, bsgp, stdDictionary)==16))
441
/* Found possible disulfide bonds. */
443
if (isBiopoly(molId1, bp) && isBiopoly(molId2, bp))
446
for (index=0; index<findChnidx(molId1, nbp, bp)-1; index++)
447
currentbp = currentbp->next;
449
rs = currentbp->residue_sequence;
453
if (rs->id == resId1)
455
rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
462
if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
463
rname = vnp->data.ptrvalue;
465
if (!StringICmp(rname, "CYS")) found1 = TRUE;
468
for (index = 0; index < findChnidx(molId2, nbp, bp)-1; index++)
469
currentbp = currentbp->next;
471
rs = currentbp->residue_sequence;
475
if (rs->id == resId2)
477
rgp = getResGraph(rs->residue_graph, bsgp, stdDictionary);
484
if (vnp = ValNodeFindNext(rgp->descr, NULL, BiomolDescr_name))
485
rname = vnp->data.ptrvalue;
487
if (!StringICmp(rname, "CYS")) found2 = TRUE;
489
if (found1 && found2)
491
ssresidx1 = resId1 - 1;
492
ssresidx2 = resId2 - 1;
493
ssmolidx1 = molId1 - 1;
494
ssmolidx2 = molId2 - 1;
495
chnidx = findChnidx(molId1, nbp, bp);
497
if (ssmolidx1 == ssmolidx2)
499
current_bioseq = bioseqs[chnidx - 1];
500
if (current_bioseq->annot)
501
current_bioseq->annot = AddDisulToSeqAnnot(current_bioseq->annot, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
503
current_bioseq->annot = AddDisulToSeqAnnot(NULL, ssresidx1, ssresidx2, current_bioseq->id, current_bioseq->id);
507
if (IS_Bioseq(pdb_entry))
508
sap = ((BioseqPtr)(pdb_entry->data.ptrvalue))->annot;
510
sap = ((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot;
515
if (IS_Bioseq(pdb_entry))
516
((BioseqPtr)(pdb_entry->data.ptrvalue))->annot = sap;
518
((BioseqSetPtr)(pdb_entry->data.ptrvalue))->annot = sap;
520
sap = AddDisulToSeqAnnot(sap, ssresidx1, ssresidx2, bioseqs[ssmolidx1]->id, bioseqs[ssmolidx2]->id);
526
currentabp = currentabp->next;
529
if ((currentabp == NULL) && (mgp == NULL) && (abp == NULL)) break;
531
else if((currentabp == NULL) && (mgp == NULL) && (abp != NULL))