329
369
Usage : $stream->write_seq($seq)
330
370
Function: writes the $seq object (must be seq) to the stream
331
371
Returns : 1 for success and 0 for error
372
Args : array of 1 to n Bio::SeqI objects
338
my ($self,$seq) = @_;
340
if( !defined $seq ) {
341
$self->throw("Attempting to write with no seq!");
344
if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
345
$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
353
my $len = $seq->length();
355
if ( !$seq->can('division') || ! defined ($div = $seq->division()) ) {
359
if( ! $seq->can('alphabet') || ! defined ($mol = $seq->alphabet) ) {
364
if( $self->_id_generation_func ) {
365
$temp_line = &{$self->_id_generation_func}($seq);
367
#$temp_line = sprintf ("%10s STANDARD; %3s; %d AA.",
368
# $seq->primary_id()."_".$div,$mol,$len);
369
# Reconstructing the ID relies heavily upon the input source having
370
# been in a format that is parsed as this routine expects it -- that is,
371
# by this module itself. This is bad, I think, and immediately breaks
372
# if e.g. the Bio::DB::GenPept module is used as input.
373
# Hence, switch to display_id(); _every_ sequence is supposed to have
374
# this. HL 2000/09/03
375
$mol =~ s/protein/PRT/;
376
$temp_line = sprintf ("%10s STANDARD; %3s; %d AA.",
377
$seq->display_id(), $mol, $len);
380
$self->_print( "ID $temp_line\n");
382
# if there, write the accession line
383
local($^W) = 0; # supressing warnings about uninitialized fields
385
if( $self->_ac_generation_func ) {
386
$temp_line = &{$self->_ac_generation_func}($seq);
387
$self->_print( "AC $temp_line\n");
389
if ($seq->can('accession_number') ) {
390
$self->_print("AC ",$seq->accession_number,";");
391
if ($seq->can('get_secondary_accessions') ) {
392
foreach my $sacc ($seq->get_secondary_accessions) {
393
$self->_print(" ",$sacc,";");
401
# otherwise - cannot print <sigh>
406
if( $seq->can('get_dates') ) {
407
foreach my $dt ( $seq->get_dates() ) {
408
$self->_write_line_swissprot_regex("DT ","DT ",
414
$self->_write_line_swissprot_regex("DE ","DE ",$seq->desc(),"\\s\+\|\$",80);
417
if ($seq->annotation->can('get_Annotations') &&
418
(my @genes = $seq->annotation->get_Annotations('gene_name') ) ) {
419
$self->_print("GN ",join(' OR ', map { $_->value } @genes),".\n");
423
if ($seq->can('species') && (my $spec = $seq->species)) {
424
my($species, @class) = $spec->classification();
425
my $genus = $class[0];
426
my $OS = "$genus $species";
427
if (my $ssp = $spec->sub_species) {
430
if (my $common = $spec->common_name) {
431
$OS .= " ($common).";
433
if ($class[$#class] =~ /viruses/i) { # different OS / OC syntax
434
$OS = $spec->common_name; # for viruses LP 09/16/2000
435
unshift @class, $species;
437
$self->_print( "OS $OS\n");
438
my $OC = join('; ', reverse(@class)) .'.';
439
$self->_write_line_swissprot_regex("OC ","OC ",$OC,"\; \|\$",80);
440
if ($spec->organelle) {
441
$self->_write_line_swissprot_regex("OG ","OG ",$spec->organelle,"\; \|\$",80);
443
if ($spec->ncbi_taxid) {
444
$self->_print("OX NCBI_TaxID=".$spec->ncbi_taxid.";\n");
450
foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
451
$self->_print( "RN [$t]\n");
452
# if ($ref->start && $ref->end) {
453
# # changed by jason <jason@chg.mc.duke.edu> on 3/16/00
454
# print "RP SEQUENCE OF ",$ref->start,"-",$ref->end," FROM N.A.\n";
456
# $self->_print("RP SEQUENCE OF ",$ref->start,"-",$ref->end," FROM N.A.\n");
459
# # changed by jason <jason@chg.mc.duke.edu> on 3/16/00
460
# print "RP ",$ref->rp,"\n";
463
# changed by lorenz 08/03/00
464
# j.gilbert and h.lapp agreed that the rp line in swissprot seems
465
# more like a comment than a parseable value, so print it as is
467
$self->_write_line_swissprot_regex("RP ","RP ",$ref->rp,
471
$self->_write_line_swissprot_regex("RC ","RC ",$ref->comment,
475
# new RX format in swissprot LP 09/17/00
477
$self->_write_line_swissprot_regex("RX ","RX ",
478
"MEDLINE=".$ref->medline.
479
"; PubMed=".$ref->pubmed.";",
482
$self->_write_line_swissprot_regex("RX MEDLINE; ","RX MEDLINE; ",
483
$ref->medline.".","\\s\+\|\$",80);
486
my $author = $ref->authors .';' if($ref->authors);
487
my $title = $ref->title .';' if( $ref->title);
489
$self->_write_line_swissprot_regex("RA ","RA ",$author,"\\s\+\|\$",80);
490
$self->_write_line_swissprot_regex("RT ","RT ",$title,"\\s\+\|\$",80);
491
$self->_write_line_swissprot_regex("RL ","RL ",$ref->location,"\\s\+\|\$",80);
497
foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
498
foreach my $cline (split ("\n", $comment->text)) {
499
while (length $cline > 74) {
500
$self->_print("CC ",(substr $cline,0,74),"\n");
501
$cline = substr $cline,74;
503
$self->_print("CC ",$cline,"\n");
507
foreach my $dblink ( $seq->annotation->get_Annotations('dblink') )
509
if (defined($dblink->comment)&&($dblink->comment)) {
510
$self->_print("DR ",$dblink->database,"; ",$dblink->primary_id,"; ",
511
$dblink->optional_id,"; ",$dblink->comment,".\n");
512
} elsif($dblink->optional_id) {
513
$self->_print("DR ",$dblink->database,"; ",$dblink->primary_id,"; ",
514
$dblink->optional_id,".\n");
517
$self->_print("DR ",$dblink->database,"; ",$dblink->primary_id,"; ",
522
# if there, write the kw line
524
if( $self->_kw_generation_func ) {
525
$temp_line = &{$self->_kw_generation_func}($seq);
526
$self->_print( "KW $temp_line\n");
528
if( $seq->can('keywords') ) {
529
$self->_write_line_swissprot_regex("KW ","KW ",
530
$seq->keywords,"\\s\+\|\$",80);
534
#Check if there is seqfeatures before printing the FT line
535
my @feats = $seq->top_SeqFeatures;
537
if( defined $self->_post_sort ) {
539
# we need to read things into an array. Process. Sort them. Print 'em
541
my $post_sort_func = $self->_post_sort();
544
my @feats = $seq->top_SeqFeatures;
546
foreach my $sf ( $seq->top_SeqFeatures ) {
547
push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
549
@fth = sort { &$post_sort_func($a,$b) } @fth;
551
foreach my $fth ( @fth ) {
552
$self->_print_swissprot_FTHelper($fth);
555
# not post sorted. And so we can print as we get them.
556
# lower memory load...
558
foreach my $sf ( $seq->top_SeqFeatures ) {
559
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
560
foreach my $fth ( @fth ) {
561
if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
562
$sf->throw("Cannot process FTHelper... $fth");
565
$self->_print_swissprot_FTHelper($fth);
570
if( $self->_show_dna() == 0 ) {
574
# finished printing features.
577
my $mw = ${Bio::Tools::SeqStats->get_mol_wt($seq->primary_seq)}[0];
579
# was crc32 checksum, changed it to crc64
580
my $crc64 = $self->_crc64(\$str);
581
$self->_print( sprintf("SQ SEQUENCE %4d AA; %d MW; %16s CRC64;\n",
585
for ($i = 0; $i < length($str); $i += 10) {
586
$self->_print( substr($str,$i,10), " ");
588
if( ($i+10)%60 == 0 && (($i+10) < length($str))) {
589
$self->_print( "\n ");
592
$self->_print( "\n//\n");
378
my ($self,@seqs) = @_;
379
foreach my $seq ( @seqs ) {
380
$self->throw("Attempting to write with no seq!") unless defined $seq;
382
if( ! ref $seq || ! $seq->isa('Bio::SeqI') ) {
383
$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!");
391
my $len = $seq->length();
393
if ( !$seq->can('division') || ! defined ($div = $seq->division()) ) {
397
if( ! $seq->can('alphabet') || ! defined ($mol = $seq->alphabet) ) {
401
$self->warn("No whitespace allowed in SWISS-PROT display id [". $seq->display_id. "]")
402
if $seq->display_id =~ /\s/;
405
if( $self->_id_generation_func ) {
406
$temp_line = &{$self->_id_generation_func}($seq);
408
#$temp_line = sprintf ("%10s STANDARD; %3s; %d AA.",
409
# $seq->primary_id()."_".$div,$mol,$len);
410
# Reconstructing the ID relies heavily upon the input source having
411
# been in a format that is parsed as this routine expects it -- that is,
412
# by this module itself. This is bad, I think, and immediately breaks
413
# if e.g. the Bio::DB::GenPept module is used as input.
414
# Hence, switch to display_id(); _every_ sequence is supposed to have
415
# this. HL 2000/09/03
416
$mol =~ s/protein/PRT/;
417
$temp_line = sprintf ("%-10s STANDARD; %3s; %d AA.",
418
$seq->display_id(), $mol, $len);
421
$self->_print( "ID $temp_line\n");
423
# if there, write the accession line
424
local($^W) = 0; # supressing warnings about uninitialized fields
426
if( $self->_ac_generation_func ) {
427
$temp_line = &{$self->_ac_generation_func}($seq);
428
$self->_print( "AC $temp_line\n");
430
if ($seq->can('accession_number') ) {
431
$self->_print("AC ",$seq->accession_number,";");
432
if ($seq->can('get_secondary_accessions') ) {
433
foreach my $sacc ($seq->get_secondary_accessions) {
434
$self->_print(" ",$sacc,";");
442
# otherwise - cannot print <sigh>
447
if( $seq->can('get_dates') ) {
448
foreach my $dt ( $seq->get_dates() ) {
449
$self->_write_line_swissprot_regex("DT ","DT ",
455
$self->_write_line_swissprot_regex("DE ","DE ",$seq->desc(),"\\s\+\|\$",80);
458
if ((my @genes = $seq->annotation->get_Annotations('gene_name') ) ) {
462
$_->isa("Bio::Annotation::StructuredValue") ?
463
$_->value(-joins => [" AND ", " OR "]) :
470
if ($seq->can('species') && (my $spec = $seq->species)) {
471
my($species, @class) = $spec->classification();
472
my $genus = $class[0];
473
my $OS = "$genus $species";
474
if ($class[-1] eq 'Viruses') {
476
$OS .= " ". $spec->sub_species if $spec->sub_species;
478
if ($class[$#class] =~ /viruses/i) {
479
# different OS / OC syntax for viruses LP 09/16/2000
482
if (my $ssp = $spec->sub_species) {
485
foreach (($spec->variant, $spec->common_name)) {
486
$OS .= " ($_)" if $_;
489
$self->_print( "OS $OS.\n");
490
my $OC = join('; ', reverse(@class)) .'.';
491
$self->_write_line_swissprot_regex("OC ","OC ",$OC,"\; \|\$",80);
492
if ($spec->organelle) {
493
$self->_write_line_swissprot_regex("OG ","OG ",$spec->organelle,"\; \|\$",80);
495
if ($spec->ncbi_taxid) {
496
$self->_print("OX NCBI_TaxID=".$spec->ncbi_taxid.";\n");
502
foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
503
$self->_print( "RN [$t]\n");
504
# changed by lorenz 08/03/00
505
# j.gilbert and h.lapp agreed that the rp line in swissprot seems
506
# more like a comment than a parseable value, so print it as is
508
$self->_write_line_swissprot_regex("RP ","RP ",$ref->rp,
512
$self->_write_line_swissprot_regex("RC ","RC ",$ref->comment,
516
# new RX format in swissprot LP 09/17/00
518
$self->_write_line_swissprot_regex("RX ","RX ",
519
"MEDLINE=".$ref->medline.
520
"; PubMed=".$ref->pubmed.";",
523
$self->_write_line_swissprot_regex("RX MEDLINE; ","RX MEDLINE; ",
524
$ref->medline.".","\\s\+\|\$",80);
527
my $author = $ref->authors .';' if($ref->authors);
528
my $title = $ref->title .';' if( $ref->title);
530
$self->_write_line_swissprot_regex("RA ","RA ",$author,"\\s\+\|\$",80);
531
$self->_write_line_swissprot_regex("RT ","RT ",$title,"\\s\+\|\$",80);
532
$self->_write_line_swissprot_regex("RL ","RL ",$ref->location,"\\s\+\|\$",80);
538
foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
539
foreach my $cline (split ("\n", $comment->text)) {
540
while (length $cline > 74) {
541
$self->_print("CC ",(substr $cline,0,74),"\n");
542
$cline = substr $cline,74;
544
$self->_print("CC ",$cline,"\n");
548
foreach my $dblink ( $seq->annotation->get_Annotations('dblink') )
550
if (defined($dblink->comment)&&($dblink->comment)) {
551
$self->_print("DR ",$dblink->database,"; ",$dblink->primary_id,"; ",
552
$dblink->optional_id,"; ",$dblink->comment,".\n");
553
} elsif($dblink->optional_id) {
554
$self->_print("DR ",$dblink->database,"; ",
555
$dblink->primary_id,"; ",
556
$dblink->optional_id,".\n");
559
$self->_print("DR ",$dblink->database,
560
"; ",$dblink->primary_id,"; ","-.\n");
564
# if there, write the kw line
567
if( my $func = $self->_kw_generation_func ) {
568
$kw = &{$func}($seq);
569
} elsif( $seq->can('keywords') ) {
570
$kw = $seq->keywords;
571
if( ref($kw) =~ /ARRAY/i ) {
572
$kw = join("; ", @$kw);
574
$kw .= '.' if( $kw !~ /\.$/ );
576
$self->_write_line_swissprot_regex("KW ","KW ",
577
$kw, "\\s\+\|\$",80);
580
#Check if there is seqfeatures before printing the FT line
581
my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : ();
583
if( defined $self->_post_sort ) {
585
# we need to read things into an array. Process. Sort them. Print 'em
587
my $post_sort_func = $self->_post_sort();
590
foreach my $sf ( @feats ) {
591
push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
593
@fth = sort { &$post_sort_func($a,$b) } @fth;
595
foreach my $fth ( @fth ) {
596
$self->_print_swissprot_FTHelper($fth);
599
# not post sorted. And so we can print as we get them.
600
# lower memory load...
602
foreach my $sf ( @feats ) {
603
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
604
foreach my $fth ( @fth ) {
605
if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
606
$sf->throw("Cannot process FTHelper... $fth");
609
$self->_print_swissprot_FTHelper($fth);
614
if( $self->_show_dna() == 0 ) {
618
# finished printing features.
621
my $mw = ${Bio::Tools::SeqStats->get_mol_wt($seq->primary_seq)}[0];
623
# was crc32 checksum, changed it to crc64
624
my $crc64 = $self->_crc64(\$str);
625
$self->_print( sprintf("SQ SEQUENCE %4d AA; %d MW; %16s CRC64;\n",
629
for ($i = 0; $i < length($str); $i += 10) {
630
$self->_print( substr($str,$i,10), " ");
632
if( ($i+10)%60 == 0 && (($i+10) < length($str))) {
633
$self->_print( "\n ");
636
$self->_print( "\n//\n");
638
$self->flush if $self->_flush_on_write && defined $self->_fh;
596
643
# Thanks to James Gilbert for the following two. LP 08/01/2000