30
30
my $db = Bio::DB::Fasta->new('/path/to/fasta/files');
32
32
my $obj = $db->get_Seq_by_id('CHROMOSOME_I');
34
my $subseq = $obj->subseq(4_000_000 => 4_100_000);
33
my $seq = $obj->seq; # sequence string
34
my $subseq = $obj->subseq(4_000_000 => 4_100_000); # string
35
my $trunc = $obj->trunc(4_000_000 => 4_100_000); # seq object
35
36
my $length = $obj->length;
75
76
a sequence entry, all lines must be the same length except for the
79
An error will be thrown if this is not the case.
78
81
The module uses /^E<gt>(\S+)/ to extract the primary ID of each sequence
79
82
from the Fasta header. During indexing, you may pass a callback routine to
80
83
modify this primary ID. For example, you may wish to extract a
430
Usage : my $db = new Bio::DB::Fasta( $path, @options);
434
Usage : my $db = Bio::DB::Fasta->new( $path, @options);
431
435
Function: initialize a new Bio::DB::Fasta object
432
436
Returns : new Bio::DB::Fasta object
433
437
Args : path to dir of fasta files or a single filename
481
485
# that contain whitespace.
482
486
$path = Win32::GetShortPathName($path)
483
487
if $^O =~ /^MSWin/i && eval 'use Win32; 1';
484
$offsets = $self->index_dir($path,$opts{-reindex});
488
$offsets = $self->index_dir($path,$opts{-reindex}) or return;
485
489
$dirname = $path;
487
491
$offsets = $self->index_file($path,$opts{-reindex});
520
524
my $flags = $write ? O_CREAT|O_RDWR : O_RDONLY;
521
525
my @dbmargs = $self->dbmargs;
522
tie %offsets,'AnyDBM_File',$index,$flags,0644,@dbmargs
523
or $self->throw( "Can't open cache file $index: $!");
527
tie %offsets,'AnyDBM_File',$index,$flags,0644,@dbmargs
528
or die "Can't open sequence index file $index: $!";
524
531
return \%offsets;
732
740
my $fh = IO::File->new($file) or $self->throw( "Can't open $file: $!");
734
742
warn "indexing $file\n" if $self->{debug};
735
my ($offset,$id,$linelength,$type,$firstline,$count,$termination_length,$seq_lines,$last_line,%offsets);
743
my ($offset,@id,$linelength,$type,$firstline,$count,
744
$termination_length,$seq_lines,$last_line,%offsets);
745
my ($l3_len,$l2_len,$l_len)=(0,0,0);
736
747
while (<$fh>) { # don't try this at home
737
$termination_length ||= /\r\n$/ ? 2 : 1; # account for crlf-terminated Windows files
748
$termination_length ||= /\r\n$/ ? 2 : 1; # account for crlf-terminated Windows files
739
750
print STDERR "indexed $count sequences...\n"
740
751
if $self->{debug} && (++$count%1000) == 0;
741
752
my $pos = tell($fh);
743
754
my $seqlength = $pos - $offset - length($_);
744
755
$seqlength -= $termination_length * $seq_lines;
745
$offsets->{$id} = &{$self->{packmeth}}($offset,$seqlength,
746
$linelength,$firstline,
756
my $ppos = &{$self->{packmeth}}($offset,$seqlength,
757
$linelength,$firstline,
759
for my $id (@id) { $offsets->{$id} = $ppos }
749
$id = ref($self->{makeid}) eq 'CODE' ? $self->{makeid}->($_) : $1;
761
@id = ref($self->{makeid}) eq 'CODE' ? $self->{makeid}->($_) : $1;
750
762
($offset,$firstline,$linelength) = ($pos,length($_),0);
751
763
$self->_check_linelength($linelength);
764
($l3_len,$l2_len,$l_len)=(0,0,0);
767
$l3_len= $l2_len; $l2_len= $l_len; $l_len= length($_); # need to check every line :(
768
if (DIE_ON_MISSMATCHED_LINES &&
769
$l3_len>0 && $l2_len>0 && $l3_len!=$l2_len) {
770
my $fap= substr($_,0,20)."..";
771
$self->throw("Each line of the fasta entry must be the same length except the last.
772
Line above #$. '$fap' is $l2_len != $l3_len chars.");
754
774
$linelength ||= length($_);
755
775
$type ||= $self->_type($_);
761
781
$self->_check_linelength($linelength);
762
782
# deal with last entry
764
784
my $pos = tell($fh);
765
785
my $seqlength = $pos - $offset;
767
786
if ($linelength == 0) { # yet another pesky empty chr_random.fa file
770
789
if ($last_line !~ /\s$/) {
773
792
$seqlength -= $termination_length * $seq_lines;
775
$offsets->{$id} = &{$self->{packmeth}}($offset,$seqlength,
776
$linelength,$firstline,
794
my $ppos = &{$self->{packmeth}}($offset,$seqlength,
795
$linelength,$firstline,
797
for my $id (@id) { $offsets->{$id} = $ppos }
779
799
$offsets->{__termination_length} = $termination_length;
780
800
return \%offsets;