312
323
package Bio::SeqIO;
316
328
use Bio::Factory::FTLocationFactory;
317
329
use Bio::Seq::SeqBuilder;
318
330
use Bio::Tools::GuessSeqFormat;
321
use base qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI);
333
use parent qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI);
323
335
my %valid_alphabet_cache;
328
Usage : $stream = Bio::SeqIO->new(-file => $filename,
341
Usage : $stream = Bio::SeqIO->new(-file => 'sequences.fasta',
330
343
Function: Returns a new sequence stream
331
344
Returns : A Bio::SeqIO stream initialised with the appropriate format
332
345
Args : Named parameters:
334
-fh => filehandle to attach to
347
-fh => filehandle to attach to
335
348
-format => format
337
Additional arguments may be used to set factories and
338
builders involved in the sequence object creation. None of
339
these must be provided, they all have reasonable defaults.
340
-seqfactory the Bio::Factory::SequenceFactoryI object
341
-locfactory the Bio::Factory::LocationFactoryI object
342
-objbuilder the Bio::Factory::ObjectBuilderI object
350
Additional arguments may be used. They all have reasonable defaults
351
and are thus optional.
352
-alphabet => 'dna', 'rna', or 'protein'
353
-flush => 0 or 1 (default, flush filehandles after each write)
354
-seqfactory => sequence factory
355
-locfactory => location factory
356
-objbuilder => object builder
344
358
See L<Bio::SeqIO::Handler>
351
my ($caller,@args) = @_;
352
my $class = ref($caller) || $caller;
354
# or do we want to call SUPER on an object if $caller is an
356
if( $class =~ /Bio::SeqIO::(\S+)/ ) {
357
my ($self) = $class->SUPER::new(@args);
358
$self->_initialize(@args);
363
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
365
unless( defined $param{-file} ||
366
defined $param{-fh} ||
367
defined $param{-string} ) {
368
$class->throw("file argument provided, but with an undefined value")
369
if exists $param{'-file'};
370
$class->throw("fh argument provided, but with an undefined value")
371
if exists $param{'-fh'};
372
$class->throw("string argument provided, but with an undefined value")
373
if exists($param{'-string'});
374
# $class->throw("No file, fh, or string argument provided"); # neither defined
377
my $format = $param{'-format'} ||
378
$class->_guess_format( $param{-file} || $ARGV[0] );
365
my ($caller,@args) = @_;
366
my $class = ref($caller) || $caller;
368
# or do we want to call SUPER on an object if $caller is an
370
if( $class =~ /Bio::SeqIO::(\S+)/ ) {
371
my ($self) = $class->SUPER::new(@args);
372
$self->_initialize(@args);
377
@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
379
unless( defined $param{-file} ||
380
defined $param{-fh} ||
381
defined $param{-string} ) {
382
$class->throw("file argument provided, but with an undefined value")
383
if exists $param{'-file'};
384
$class->throw("fh argument provided, but with an undefined value")
385
if exists $param{'-fh'};
386
$class->throw("string argument provided, but with an undefined value")
387
if exists($param{'-string'});
388
# $class->throw("No file, fh, or string argument provided"); # neither defined
391
my $format = $param{'-format'} ||
392
$class->_guess_format( $param{-file} || $ARGV[0] );
382
$format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
383
} elsif ($param{-fh}) {
384
$format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
387
# changed 1-3-11; no need to print out an empty string (only way this
388
# exception is triggered) - cjfields
389
$class->throw("Could not guess format from file/fh") unless $format;
390
$format = "\L$format"; # normalize capitalization to lower case
392
if ($format =~ /-/) {
393
($format, my $variant) = split('-', $format, 2);
394
push @args, (-variant => $variant);
396
return unless( $class->_load_format_module($format) );
397
return "Bio::SeqIO::$format"->new(@args);
396
$format = Bio::Tools::GuessSeqFormat->new(-file => $param{-file}||$ARGV[0] )->guess;
397
} elsif ($param{-fh}) {
398
$format = Bio::Tools::GuessSeqFormat->new(-fh => $param{-fh}||$ARGV[0] )->guess;
401
# changed 1-3-11; no need to print out an empty string (only way this
402
# exception is triggered) - cjfields
403
$class->throw("Could not guess format from file/fh") unless $format;
404
$format = "\L$format"; # normalize capitalization to lower case
406
if ($format =~ /-/) {
407
($format, my $variant) = split('-', $format, 2);
408
push @args, (-variant => $variant);
412
return unless( $class->_load_format_module($format) );
413
return "Bio::SeqIO::$format"->new(@args);
404
421
Usage : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
405
Function: does a new() followed by an fh()
422
Function: Does a new() followed by an fh()
406
423
Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
407
424
$sequence = <$fh>; # read a sequence object
408
425
print $fh $sequence; # write a sequence object
438
my $class = ref($self) || $self;
439
my $s = Symbol::gensym;
440
tie $$s,$class,$self;
455
my $class = ref($self) || $self;
456
my $s = Symbol::gensym;
457
tie $$s,$class,$self;
444
462
# _initialize is chained for all SeqIO classes
446
464
sub _initialize {
447
my($self, @args) = @_;
449
# flush is initialized by the Root::IO init
451
my ($seqfact,$locfact,$objbuilder, $alphabet) =
452
$self->_rearrange([qw(SEQFACTORY
458
$locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose)
460
$objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose)
462
$self->sequence_builder($objbuilder);
463
$self->location_factory($locfact);
465
# note that this should come last because it propagates the sequence
466
# factory to the sequence builder
467
$seqfact && $self->sequence_factory($seqfact);
465
my($self, @args) = @_;
467
# flush is initialized by the Root::IO init
469
my ($seqfact,$locfact,$objbuilder, $alphabet) =
470
$self->_rearrange([qw(SEQFACTORY
476
$locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose)
478
$objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose)
480
$self->sequence_builder($objbuilder);
481
$self->location_factory($locfact);
483
# note that this should come last because it propagates the sequence
484
# factory to the sequence builder
485
$seqfact && $self->sequence_factory($seqfact);
470
488
$alphabet && $self->alphabet($alphabet);
473
# initialize the IO part
474
$self->_initialize_io(@args);
490
# initialize the IO part
491
$self->_initialize_io(@args);
538
570
my ($self, $value) = @_;
540
572
if ( defined $value) {
542
unless ($valid_alphabet_cache{$value}) {
543
# instead of hard-coding the allowed values once more, we check by
544
# creating a dummy sequence object
546
require Bio::PrimarySeq;
547
my $seq = Bio::PrimarySeq->new('-verbose' => $self->verbose,
548
'-alphabet' => $value);
551
$self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
553
$valid_alphabet_cache{$value} = 1;
555
$self->{'alphabet'} = $value;
574
unless ($valid_alphabet_cache{$value}) {
575
# instead of hard-coding the allowed values once more, we check by
576
# creating a dummy sequence object
578
require Bio::PrimarySeq;
579
my $seq = Bio::PrimarySeq->new('-verbose' => $self->verbose,
580
'-alphabet' => $value);
583
$self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
585
$valid_alphabet_cache{$value} = 1;
587
$self->{'alphabet'} = $value;
557
589
return $self->{'alphabet'};
560
593
=head2 _load_format_module
562
595
Title : _load_format_module
571
604
sub _load_format_module {
572
my ($self, $format) = @_;
573
my $module = "Bio::SeqIO::" . $format;
605
my ($self, $format) = @_;
606
my $module = "Bio::SeqIO::" . $format;
577
$ok = $self->_load_module($module);
610
$ok = $self->_load_module($module);
581
614
$self: $format cannot be found
583
616
For more information about the SeqIO system please see the SeqIO docs.
584
617
This includes ways of checking for formats at compile time, not run time
591
625
=head2 _concatenate_lines
593
627
Title : _concatenate_lines
643
678
sub _guess_format {
644
679
my $class = shift;
645
680
return unless $_ = shift;
646
return 'abi' if /\.ab[i1]$/i;
647
return 'ace' if /\.ace$/i;
648
return 'alf' if /\.alf$/i;
649
return 'bsml' if /\.(bsm|bsml)$/i;
650
return 'ctf' if /\.ctf$/i;
651
return 'embl' if /\.(embl|ebl|emb|dat)$/i;
652
return 'entrezgene' if /\.asn$/i;
653
return 'exp' if /\.exp$/i;
654
return 'fasta' if /\.(fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa)$/i;
655
return 'fastq' if /\.fastq$/i;
656
return 'gcg' if /\.gcg$/i;
657
return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i;
658
return 'phd' if /\.(phd|phred)$/i;
659
return 'pir' if /\.pir$/i;
660
return 'pln' if /\.pln$/i;
661
return 'qual' if /\.qual$/i;
662
return 'raw' if /\.txt$/i;
663
return 'scf' if /\.scf$/i;
664
return 'swiss' if /\.(swiss|sp)$/i;
681
return 'abi' if /\.ab[i1]$/i;
682
return 'ace' if /\.ace$/i;
683
return 'alf' if /\.alf$/i;
684
return 'bsml' if /\.(bsm|bsml)$/i;
685
return 'ctf' if /\.ctf$/i;
686
return 'embl' if /\.(embl|ebl|emb|dat)$/i;
687
return 'entrezgene' if /\.asn$/i;
688
return 'exp' if /\.exp$/i;
689
return 'fasta' if /\.(fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa)$/i;
690
return 'fastq' if /\.fastq$/i;
691
return 'gcg' if /\.gcg$/i;
692
return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i;
693
return 'phd' if /\.(phd|phred)$/i;
694
return 'pir' if /\.pir$/i;
695
return 'pln' if /\.pln$/i;
696
return 'qual' if /\.qual$/i;
697
return 'raw' if /\.txt$/i;
698
return 'scf' if /\.scf$/i;
699
return 'swiss' if /\.(swiss|sp)$/i;
666
701
# from Strider 1.4 Release Notes: The file name extensions used by
667
702
# Strider 1.4 are ".xdna", ".xdgn", ".xrna" and ".xprt" for DNA,
668
703
# DNA Degenerate, RNA and Protein Sequence Files, respectively
669
return 'strider' if /\.(xdna|xdgn|xrna|xprt)$/i;
704
return 'strider' if /\.(xdna|xdgn|xrna|xprt)$/i;
671
return 'ztr' if /\.ztr$/i;
706
return 'ztr' if /\.ztr$/i;
680
my ($class,$val) = @_;
681
return bless {'seqio' => $val}, $class;
717
my ($class,$val) = @_;
718
return bless {'seqio' => $val}, $class;
686
return $self->{'seqio'}->next_seq() unless wantarray;
688
push @list, $obj while $obj = $self->{'seqio'}->next_seq();
724
return $self->{'seqio'}->next_seq() unless wantarray;
726
push @list, $obj while $obj = $self->{'seqio'}->next_seq();
694
$self->{'seqio'}->write_seq(@_);
733
$self->{'seqio'}->write_seq(@_);
697
737
=head2 sequence_factory
699
739
Title : sequence_factory
707
sub sequence_factory{
708
my ($self,$obj) = @_;
747
sub sequence_factory {
748
my ($self, $obj) = @_;
709
749
if( defined $obj ) {
710
if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
711
$self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()");
713
$self->{'_seqio_seqfactory'} = $obj;
714
my $builder = $self->sequence_builder();
715
if($builder && $builder->can('sequence_factory') &&
716
(! $builder->sequence_factory())) {
717
$builder->sequence_factory($obj);
750
if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
751
$self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()");
753
$self->{'_seqio_seqfactory'} = $obj;
754
my $builder = $self->sequence_builder();
755
if($builder && $builder->can('sequence_factory') &&
756
(! $builder->sequence_factory())) {
757
$builder->sequence_factory($obj);
720
760
$self->{'_seqio_seqfactory'};
723
764
=head2 object_factory
725
766
Title : object_factory