1
/*===========================================================================
4
* National Center for Biotechnology Information
6
* This software/database is a "United States Government Work" under the
7
* terms of the United States Copyright Act. It was written as part of
8
* the author's official duties as a United States Government employee and
9
* thus cannot be copyrighted. This software/database is freely available
10
* to the public for use. The National Library of Medicine and the U.S.
11
* Government have not placed any restriction on its use or reproduction.
13
* Although all reasonable efforts have been taken to ensure the accuracy
14
* and reliability of the software and data, the NLM and the U.S.
15
* Government do not and cannot warrant the performance or results that
16
* may be obtained by using this software or data. The NLM and the U.S.
17
* Government disclaim all warranties, express or implied, including
18
* warranties of performance, merchantability or fitness for any particular
21
* Please cite the author in any work or product based on this material.
23
* ===========================================================================
25
* File Name: mmdborgcpy.c
29
* Initial Version Creation Date: 04/19/1997
32
* Copy taxonomy assignment from structure bioseq to biostruc
36
* --------------------------------------------------------------------------
37
* $Log: mmdborgcpy.c,v $
38
* Revision 1.1 2002/12/17 15:13:43 ywang
39
* pdbeast related tools updated
42
* ==========================================================================
45
/*----------------- get org from Bioseq and put them in Biostruc -------------*/
60
{"Text Input Seq-entry File Directory","/net/keystone/usr/people/ywang/work/taxonomy/RawBioseq/", NULL,NULL,TRUE,'h',ARG_STRING,0.0,0,NULL},
61
{"Binary Input Seq-entry File Directory","/net/clobber/usr/people8/bryant/MMDB.Bioseq/", NULL,NULL,TRUE,'i',ARG_STRING,0.0,0,NULL},
62
{"Text Input Biostruc File Directory","/net/keystone/usr/people/ywang/work/taxonomy/Biostr_text/", NULL,NULL,TRUE,'h',ARG_STRING,0.0,0,NULL},
63
{"Binary Input Biostruc File Directory","/net/clobber/usr/people8/bryant/MMDB/", NULL,NULL,TRUE,'i',ARG_STRING,0.0,0,NULL},
64
{"Text Output Biostruc File Directory","/net/keystone/usr/people/ywang/work/taxonomy/Bioseq_text/", NULL,NULL,TRUE,'n',ARG_STRING,0.0,0,NULL},
65
{"Binary Output Biostruc File Directory","/net/clobber/usr/people10/ywang/work/taxonomy/MMDB/", NULL,NULL,TRUE,'o',ARG_STRING,0.0,0,NULL},
66
{"Entry Code List File Directory","/net/keystone/usr/people/ywang/work/taxonomy/index/", NULL,NULL,TRUE,'b',ARG_STRING,0.0,0,NULL},
67
{"List File Name","all.OCT96", NULL,NULL,TRUE,'c',ARG_FILE_IN,0.0,0,NULL},
68
{"Log errors to file named:","log.pdbeast.April",NULL,NULL,TRUE,'l',ARG_FILE_OUT, 0.0,0,NULL} };
70
#define chain_choice_num 1
71
#define biosource_choice_num 6
72
#define molecule_type_num 9
75
Boolean IsBioseq, IsBioseqSet, TextInSeq = FALSE, TextInStr = FALSE, TextOutStr = FALSE, SeqOpen;
76
Int4 nchain, num_assign;
77
Char chain[100], current_chain;
78
BioSourcePtr biosource_chain[100];
80
BiostrucPtr root_bstrucp;
81
AsnIoPtr aip1, aip2, aip3;
82
/*---------------- Initialize --------------------------*/
94
/*-------------------------------------------------*/
97
printf("\t -aid.seq Text Input Seq-entry File Directory\n");
98
printf("\t -bid.seq Binary Input Seq-entry File Directory\n");
99
printf("\t -aid.str Text Input Biostruc File Directory\n");
100
printf("\t -bid.str Binary Input Biostruc File Directory\n");
101
printf("\t -aod Text Output Biostruc File Directory\n");
102
printf("\t -bod Binary Output Biostruc File Directory\n");
103
printf("\t -ListPath Entry Code List File Directory\n");
104
printf("\t -ListName Entry Code List File Name\n");
105
printf("\t -Logfile File Name For Log Errors\n");
106
printf("\t -im.seq Input Mode of Seq-entry File: 'a' for text / 'b' for binary\n");
107
printf("\t -im.str Input Mode of Biostruc File: 'a' for text / 'b' for binary\n");
108
printf("\t -om Output Mode of Biostruc File: 'a' for text / 'b' for binary\n");
113
/*--------------- Get Arguments ------------------*/
119
Char listname[120], Command[120], str[120];
129
if(StringCmp(*argv, "-aid.seq") == 0) {
130
sscanf(*++argv, "%s", my_argu);
131
myargs[0].strvalue = StringSave(my_argu);
133
else if (StringCmp(*argv, "-bid.seq") == 0) {
134
sscanf(*++argv, "%s", my_argu);
135
myargs[1].strvalue = StringSave(my_argu);
137
if(StringCmp(*argv, "-aid.str") == 0) {
138
sscanf(*++argv, "%s", my_argu);
139
myargs[2].strvalue = StringSave(my_argu);
141
else if (StringCmp(*argv, "-bid.str") == 0) {
142
sscanf(*++argv, "%s", my_argu);
143
myargs[3].strvalue = StringSave(my_argu);
145
else if (StringCmp(*argv, "-aod") == 0) {
146
sscanf(*++argv, "%s", my_argu);
147
myargs[4].strvalue = StringSave(my_argu);
149
else if (StringCmp(*argv, "-bod") == 0) {
150
sscanf(*++argv, "%s", my_argu);
151
myargs[5].strvalue = StringSave(my_argu);
153
else if (StringCmp(*argv, "-Listpath") == 0) {
154
sscanf(*++argv, "%s", my_argu);
155
myargs[6].strvalue = StringSave(my_argu);
157
else if (StringCmp(*argv, "-ListName") == 0) {
158
sscanf(*++argv, "%s", my_argu);
159
myargs[7].strvalue = StringSave(my_argu);
161
else if (StringCmp(*argv, "-Logfile") == 0) {
162
sscanf(*++argv, "%s", my_argu);
163
myargs[8].strvalue = StringSave(my_argu);
165
else if (StringCmp(*argv, "-im.seq") == 0) {
166
sscanf(*++argv, "%s", str);
167
if(StringCmp(str, "a") == 0) TextInSeq = TRUE;
168
else if(StringCmp(str, "b") == 0) TextInSeq = FALSE;
170
else if (StringCmp(*argv, "-im.str") == 0) {
171
sscanf(*++argv, "%s", str);
172
if(StringCmp(str, "a") == 0) TextInStr = TRUE;
173
else if(StringCmp(str, "b") == 0) TextInStr = FALSE;
175
else if (StringCmp(*argv, "-om") == 0) {
176
sscanf(*++argv, "%s", str);
177
if(StringCmp(str, "a") == 0) TextOutStr = TRUE;
178
else if(StringCmp(str, "b") == 0)TextOutStr = FALSE;
185
/*------------------------------------------------------*/
194
/*--------------- Get PDB Seq-id ----------------------------*/
195
PDBSeqIdPtr GetPdbSeqId(BioseqPtr my_bsp)
199
PDBSeqIdPtr my_pdb_seq_id;
201
my_seq_id = my_bsp->id;
203
while(my_seq_id != NULL){
204
if(my_seq_id->choice == 15){
205
my_pdb_seq_id = my_seq_id->data.ptrvalue;
208
my_seq_id = my_seq_id->next;
211
return(my_pdb_seq_id);
214
/*------------------------------------------------------*/
215
AccessBioseq(BioseqPtr my_bsp)
217
BioSourcePtr my_bio_sourcep;
218
PDBSeqIdPtr my_pdb_seq_id;
223
my_pdb_seq_id = GetPdbSeqId(my_bsp);
225
if(my_pdb_seq_id !=NULL) {
226
if(my_pdb_seq_id->chain !=NULL) chain[nchain] = (Char) my_pdb_seq_id->chain;
227
else printf("No Identifier for This Chain!!!\n");
230
descr = my_bsp->descr;
232
biosource_chain[nchain] = NULL;
234
while(descr != NULL){
235
if(descr->choice == 23) {
236
biosource_chain[nchain] = descr->data.ptrvalue;
246
/*------------------------------------------------------*/
247
AccessSeqEntry(SeqEntryPtr my_sep)
251
BioseqSetPtr my_bssp;
255
choice = my_sep->choice;
258
my_bsp = my_sep->data.ptrvalue;
259
AccessBioseq(my_bsp);
262
else if(choice == 2){
264
my_bssp = my_sep->data.ptrvalue;
265
my_sep = my_bssp->seq_set;
267
while(my_sep != NULL){
268
my_bsp = my_sep->data.ptrvalue;
269
AccessBioseq(my_bsp);
271
my_sep = my_sep->next;
276
/*------------------------------------------------------*/
277
GoToSeqEntry(Char code[120])
285
StringCpy(fname, myargs[0].strvalue);
286
StringCat(fname, "a");
289
StringCpy(fname, myargs[1].strvalue);
290
StringCat(fname, "b");
293
StringCat(fname, code);
294
StringCat(fname, ".ASN1");
297
if(!(aip1 = AsnIoOpen(fname, "r"))) {
298
printf("Seq-Entry File %s can not be opened!", fname);
304
if(!(aip1 = AsnIoOpen(fname, "rb"))) {
305
printf("Seq-Entry File %s can not be opened!", fname);
312
root_sep = SeqEntryAsnRead(aip1, NULL);
314
aip1 = AsnIoClose(aip1);
316
AccessSeqEntry(root_sep);
319
/*-------------- Delete existed Org -------------------*/
320
DeleteOrg(MoleculeGraphPtr mgp)
322
BioSourcePtr my_biosource;
323
ValNodePtr descr, last_descr = NULL;
327
while(descr != NULL){
328
if(descr->choice == biosource_choice_num){
329
my_biosource = descr->data.ptrvalue;
330
my_biosource = BioSourceFree(my_biosource);
331
if(last_descr != NULL) {
332
last_descr->next = descr->next;
338
mgp->descr = descr->next;
352
/*------------------------------------------------------*/
353
GetBioSource(MoleculeGraphPtr mgp)
357
Boolean OrgExist = FALSE;
359
BioSourcePtr dummy_bsourcep;
360
BioSourcePtr ThisBiosource;
365
while(descr != NULL){
366
if(descr->choice == biosource_choice_num){
367
ThisBiosource = descr->data.ptrvalue;
368
if(ThisBiosource != NULL) {
375
for(n = 0 ; n < nchain; n++){
376
if(current_chain == chain[n]){
378
if(biosource_chain[n] != NULL){
379
dummy_bsourcep = AsnIoMemCopy(biosource_chain[n], (AsnReadFunc)BioSourceAsnRead, (AsnWriteFunc) BioSourceAsnWrite);
381
ValNodeAddPointer(&mgp->descr, biosource_choice_num, dummy_bsourcep);
385
printf("BioSourcePtr biosource_chain is NULL\n");
395
/*------------------------------------------------------*/
396
AccessBiostruc(BiostrucPtr my_bstrucp)
399
BiostrucGraphPtr bgp;
400
MoleculeGraphPtr mgp;
407
bgp = my_bstrucp->chemical_graph;
408
mgp = bgp->molecule_graphs;
411
if(mgp->descr != NULL){
413
while(descr != NULL){
414
if(descr->choice == chain_choice_num){
415
chainp = descr->data.ptrvalue;
416
current_chain = *chainp;
418
if(descr->choice == molecule_type_num) molecule_type = descr->data.intvalue;
424
if( molecule_type == Molecule_type_protein || molecule_type == Molecule_type_dna || molecule_type == Molecule_type_rna || molecule_type == Molecule_type_other_biopolymer || molecule_type == Molecule_type_other_nonpolymer) GetBioSource(mgp);
427
descr = ValNodeNew(NULL);
430
mgp->descr = descr->next;
433
if(num_assign == nchain ) break;
434
else mgp = mgp->next;
442
/*------------------------------------------------------*/
443
GoToBiostruc(Char code[120])
451
StringCpy(fname, myargs[2].strvalue);
452
StringCat(fname, "a");
455
StringCpy(fname, myargs[3].strvalue);
456
StringCat(fname, "b");
459
StringCat(fname, code);
460
/* StringCat(fname, ".ASN1"); */
463
if(!(aip2 = AsnIoOpen(fname, "r"))) {
464
printf("Can not open Input Biostruc File %s\n", fname);
465
fprintf(afp, "Org has NOT been copied in!\n\n");
470
if(!(aip2 = AsnIoOpen(fname, "rb"))) {
471
printf("Can not open Input Biostruc File %s\n", fname);
472
fprintf(afp, "Org has NOT been copied in!\n\n");
477
root_bstrucp = BiostrucAsnRead(aip2, NULL);
478
aip2 = AsnIoClose(aip2);
482
StringCpy(fname, myargs[4].strvalue);
483
StringCat(fname, "a");
486
StringCpy(fname, myargs[5].strvalue);
487
StringCat(fname, "b");
490
StringCat(fname, code);
491
/* StringCat(fname, ".ASN1"); */
494
if(!(aip3 = AsnIoOpen(fname, "w"))) {
495
printf("File %s can not be opened!\n");
496
fprintf(afp, "Org has NOT been copied in!\n\n");
501
if(!(aip3 = AsnIoOpen(fname, "wb"))) {
502
printf("File %s can not be opened!\n");
503
fprintf(afp, "Org has NOT been copied in!\n\n");
508
/* root_bstrucp = BiostrucAsnRead(aip2, NULL);
509
aip2 = AsnIoClose(aip2); */
511
AccessBiostruc(root_bstrucp);
513
BiostrucAsnWrite(root_bstrucp, aip3, NULL);
514
aip3 = AsnIoClose(aip3);
515
fprintf(afp, "Org has been copied in!\n\n");
517
root_bstrucp = BiostrucFree(root_bstrucp);
518
root_sep = SeqEntryFree(root_sep);
523
/*---------------- Main Starts Here ------------------*/
526
Char fname[120], code[120];
530
InitArg(GetArgc(), GetArgv());
532
afp = FileOpen(myargs[8].strvalue, "w");
534
StringCpy(fname, myargs[6].strvalue);
535
StringCat(fname, myargs[7].strvalue);
537
CodeList = FileOpen(fname, "r");
538
if(CodeList == NULL){
539
printf("Problem reading file: %s \n", fname);
543
while(fscanf(CodeList, "%s", code) != EOF){
544
fprintf(afp, "\tEntry: %s\n", code);
546
printf("%s\n", code);
549
if(SeqOpen) GoToBiostruc(code);